In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
from google.colab import files
!cp -r "/content/drive/My Drive/CSC420P/data" .

In [0]:
!pip install pyts

In [0]:
from PIL import Image
import glob
import numpy as np
import torch
from torch.utils.data.dataset import Dataset
import pandas as pd
import matplotlib.pyplot as plt
import os
import cv2
from pyts.image import RecurrencePlot, GramianAngularField
from mpl_toolkits.axes_grid1 import ImageGrid
from PIL import Image

class GetData(Dataset):

    def __init__(self, image_path, masks):
        self.masks = masks
        self.images_path = glob.glob(str(image_path) + str("/*"))
        self.data_len = len(self.images_path)

    def __getitem__(self, index):
        single_img = self.images_path[index]
        img = Image.open(single_img)  # .convert('L')
        img_np = np.array(img,dtype=np.float)
        img_np = cv2.resize(img_np,(128,128))
        img_np = img_np.transpose((2, 0, 1))
        img_list = [1-(image / 255) for image in img_np]
        img_tensors = torch.tensor(img_list).float().cuda()

        mask_tensors = torch.tensor([int(self.masks[index][1])]).float().cuda()

        return (img_tensors, mask_tensors)

    def __len__(self):
        return self.data_len

import pandas as pd


def prep_data(img_path, label_path):
    labels = pd.read_csv(label_path)
    labels = labels.values
    dataset = GetData(img_path, labels)
    dataset_size = len(dataset)
    indices = list(range(dataset_size))
    split = int(np.floor(0.15 * dataset_size))
    np.random.seed(420)
    np.random.shuffle(indices)
    train_i, val_i, test_i = indices[:dataset_size-2*split], indices[dataset_size-2*split:dataset_size-split], indices[dataset_size-split:]
    train_sampler = torch.utils.data.SubsetRandomSampler(train_i)
    valid_sampler = torch.utils.data.SubsetRandomSampler(val_i)
    test_sampler = torch.utils.data.SubsetRandomSampler(test_i)
    trainLoad = torch.utils.data.DataLoader(dataset=dataset, num_workers=0, batch_size=100, sampler=train_sampler)
    validLoad = torch.utils.data.DataLoader(dataset=dataset, num_workers=0, batch_size=75, sampler=valid_sampler)
    testLoad = torch.utils.data.DataLoader(dataset=dataset, num_workers=0, batch_size=75, sampler=test_sampler)

    return trainLoad, validLoad, testLoad

def feature_points(img, start_sigma=0.5, k=1.414):
    scales = 5
    N,M = img.shape[:2]
    img = np.array(img[:,:,0],np.double)
    dog = np.zeros((N,M,scales+1), dtype=np.double)
    for i in range(scales+1):
        dog[:,:,i] = cv2.GaussianBlur(img, (11,11), (k**i)*start_sigma)
    for i in range(scales):
        dog[:,:,i] = dog[:,:,i+1] - dog[:,:,i]
    dog = np.abs(dog)
    #m = np.max(dog, axis=0)
    #m = np.max(m, axis=0)
    dog = dog/np.max(dog[:,:,:scales])
    ys, xs, scale = np.where(dog[:,:,:scales] > 0.7)
    features = []
    #print(scale)
    for i in range(ys.shape[0]):
        y = ys[i]
        x = xs[i]
        p = np.argmax(dog[y,x,:scales])
        if p == scale[i]:
            patch = dog[y-1:y+1,x-1:x+1,scale[i]]
            #print(patch)
            m = np.max(patch)
            if m == dog[y,x,scale[i]]:
                features.append((y, x,(k**scale[i])*start_sigma))
    return features


In [0]:


# data creation
data = pd.read_csv("/content/drive/My Drive/data_stocks.csv")
data = data.drop(['DATE'], 1)
data = data.drop(['SP500'], 1)

r, c = data.values.shape
# print(data.columns.values[0])
# print(data.values[:, 0][:-30])
# print(data.values[:, 0][-32])
# print(data.values[:, 0][-31])


if not os.path.exists("./data/original"):
    os.makedirs("./data/original")
if not os.path.exists("./data/change1"):
    os.makedirs("./data/change1")
if not os.path.exists("./data/change2"):
    os.makedirs("./data/change2")
if not os.path.exists("./data/RCplot"):
    os.makedirs("./data/RCplot")
if not os.path.exists("./data/RCplot1"):
    os.makedirs("./data/RCplot1")
if not os.path.exists("./data/RCplot2"):
    os.makedirs("./data/RCplot2")

prediction = []
for i in range(c): #len(c)
    plt.plot(data.values[:, i][:-30])
    plt.axis('off')
    plt.savefig("./data/original/{}.jpg".format(data.columns.values[i]))
    plt.clf()
    avg_30 = np.sum(data.values[:, i][-30:])/30
    if data.values[:, i][-31] > avg_30:  # why?
        prediction.append(0)
    else:
        prediction.append(1)

    rp = RecurrencePlot()
    X_rp = rp.fit_transform(np.array([list(range(0,r//4)), data.values[:, i][:r//4]]))    # large or less data?
    plt.imshow(X_rp[1], cmap='gist_ncar', origin='lower')
    plt.tight_layout()
    plt.axis('off')
    plt.savefig("./data/RCplot/{}.jpg".format(data.columns.values[i]))
    plt.clf()
    X_rp = rp.fit_transform(np.array([list(range(3*r//4,r-30)), data.values[:, i][3*r//4:r-30]]))    # large or less data? 
    plt.imshow(X_rp[1], cmap='gist_ncar', origin='lower')
    plt.tight_layout()
    plt.axis('off')
    plt.savefig("./data/RCplot2/{}.jpg".format(data.columns.values[i]))
    plt.clf()
    del X_rp, rp

labels = np.array([data.columns.values, prediction]) # only 20 change to all
labels = labels.T
df = pd.DataFrame(labels, columns=["Name", "Prediction"])
df.to_csv("./labels.csv", index=False)

# image manipulation
labels = pd.read_csv("./labels.csv")
labels = labels.values
image_path = glob.glob(str('./data/original') + str("/*"))
for i in range(len(image_path)):
    single_mask = labels[i][1]
    single_img = image_path[i]
    img = Image.open(single_img)
    img1 = cv2.imread(single_img)
    result = feature_points(np.array(img))
    result1 = feature_points(img1)
    keypoints = []
    for item in result:
        k = cv2.KeyPoint(item[1],item[0],item[2])
        keypoints.append(k)
    keypoints1 = []
    for item in result1:
        k = cv2.KeyPoint(item[1], item[0], item[2])
        keypoints1.append(k)
    x = cv2.drawKeypoints(img1, np.array(keypoints), None)
    x1 = cv2.drawKeypoints(img1, np.array(keypoints1), None)
    location = "./data/change1/{}.jpg".format(labels[i][0])
    loation1 = "./data/change2/{}.jpg".format(labels[i][0])
    cv2.imwrite(location, x)
    cv2.imwrite(loation1, x1)




In [0]:
import torch
from torch import nn, optim
import torch.nn.functional as F
import numpy as np
import cv2 as cv
import os
import time
import matplotlib
import matplotlib.pyplot as plt


def CrossEntropyLoss(outputs, targets, alpha=1, gamma=5):
  cross = -(targets*torch.log(outputs) + (1 - targets)*torch.log(1-outputs))
  return torch.mean(cross)


def FocalLoss(outputs, targets, alpha=1, gamma=5):
  cross = -(targets*torch.log(outputs) + (1 - targets)*torch.log(1-outputs))
  pt = torch.exp(cross)
  loss = (alpha*(1 - pt)**gamma)*cross
  return torch.mean(loss)

def weights_init(m):

    if type(m) == nn.Linear:
        y = m.in_features
        m.weight.data.normal_(0.0,1/np.sqrt(y))
        m.bias.data.fill_(0)
    elif type(m) == nn.Conv2d:
      m.weight.data.normal_(0.0,2/np.sqrt(m.in_channels))
      m.bias.data.fill_(0)

class ConvCoords(nn.Module):

    def __init__(self, in_channels, out_channels=64, kernel=7, padding=4, stride=2):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels + 2, out_channels, kernel_size=kernel, padding=padding, stride=stride),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),)


    def forward(self, input_):

        b, _, w, h = input_.size()

        x_coords = torch.arange(w).repeat(1, h, 1)
        y_coords = torch.arange(h).repeat(1, w, 1).transpose(1, 2)

        x_coords = x_coords.float() / (w - 1)
        y_coords = y_coords.float() / (h - 1)

        #centering coordinates around image centre
        x_coords = 2*x_coords - 1
        y_coords = 2*y_coords - 1

        x_coords = x_coords.repeat(b, 1, 1, 1).transpose(2, 3)
        y_coords = y_coords.repeat(b, 1, 1, 1).transpose(2, 3)

        coord = torch.cat((input_, x_coords.type_as(input_), y_coords.type_as(input_)), dim=1)

        return self.conv(coord)

class ResBlock(nn.Module):
  def __init__(self,num_in_channels, out_channels, kernel=3, stride=1):
        super(ResBlock, self).__init__()

        padding = kernel // 2
        self.stride = stride
        self.conv1 = nn.Sequential(
            nn.Conv2d(num_in_channels, out_channels, kernel_size=kernel, padding=padding,stride=stride),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),)
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(out_channels, out_channels, kernel_size=kernel, padding=padding),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),)
        
        if (stride == 2):
          self.conv_reshape = nn.Conv2d(num_in_channels, out_channels, kernel_size=1,stride=2)
        
  def forward(self, x):
    out = self.conv1(x)
    out = self.conv2(out)
    if self.stride == 1:
      out += x
    elif self.stride == 2:
      out += self.conv_reshape(x)
    return out


class Percep(nn.Module):
  def __init__(self):
    super(Percep, self).__init__()

    self.linear_reg = nn.Sequential(nn.Linear(128*128*3, 512),
                    nn.ReLU(),
                    nn.Linear(512, 1),
                    nn.Sigmoid())
    
  def forward(self, x):
    b = x.size(0)
    out = x.view(b,-1)
    return self.linear_reg(out)


class ResNet(nn.Module):
  def __init__(self, in_channels, num_filters=64, kernel=3):
    super(ResNet, self).__init__()
    self.conv_coords = nn.Sequential(
        ConvCoords(in_channels,out_channels=num_filters),
        nn.MaxPool2d(3,stride=2))
    self.res1 = ResBlock(num_filters,num_filters)

    self.res2 = nn.Sequential(
        ResBlock(num_filters,num_filters*2, stride=2),
        ResBlock(num_filters*2,num_filters*2))

    self.res3 = nn.Sequential(
        ResBlock(num_filters*2,num_filters*4, stride=2),
        ResBlock(num_filters*4,num_filters*4))
     
    self.res4 = nn.Sequential(
        ResBlock(num_filters*4,num_filters*8, stride=2),
        ResBlock(num_filters*8,num_filters*8))
    
    self.res5 = nn.Sequential(
        ResBlock(num_filters*8,num_filters*16, stride=2),
        ResBlock(num_filters*16,num_filters*16))
    
    self.linear_reg = nn.Sequential(nn.Linear(num_filters*64, 1),
                    nn.Sigmoid())
    
  def forward(self, x):
    b = x.size(0)
    out = self.conv_coords(x)
    out = self.res1(out)
    out = self.res2(out)
    out = self.res3(out)
    out = self.res4(out)
    out = self.res5(out)
    out = out.view(b,-1)
    return 0.99*self.linear_reg(out)+0.001

    

In [0]:
def train(type, dataset, in_channels=3, epochs=10, lr=0.001, wd=1, clip=None):
    if (type == 'res'):
      net = ResNet(in_channels)
    else:
      net = Percep()
    
    net.apply(weights_init)

    torch.cuda.empty_cache()
    net.float()
    net.cuda()
    optimizer = torch.optim.Adam(net.parameters(), lr=lr,weight_decay=wd)

    # DATA
    print("Loading data...")
    # Normal Charts
    if (dataset == 0):
      path = "/content/drive/My Drive/CSC420P/data/original/"
    # Augmeted with SIFT
    elif (dataset == 1):
      path = "/content/drive/My Drive/CSC420P/data/change1/"
    # RC Plot
    else:
      path = "/content/drive/My Drive/CSC420P/data/RCplot1/"

    trainLoad, validLoad, testLoad = prep_data(path, "/content/drive/My Drive/CSC420P/labels.csv")

    start = time.time()

    train_losses = []
    valid_losses = []
    valid_accs = []
    for epoch in range(epochs):
        # Train the Model
        net.train() # Change model to 'train' mode
        losses = []
        for batch, (images, masks) in enumerate(trainLoad):
          # Forward + Backward + Optimize
          optimizer.zero_grad()
          outputs = net(images)
          
          loss = CrossEntropyLoss(outputs,masks)
          loss.backward()
          if clip is not None:
            nn.utils.clip_grad_value_(net.parameters(),clip)
          
          torch.cuda.empty_cache()
          optimizer.step()
          losses.append(loss.data.item())
          del loss
          torch.cuda.empty_cache()
        
        avg_loss = np.mean(losses)
        del losses
        torch.cuda.empty_cache()
        train_losses.append(avg_loss)
        time_elapsed = time.time() - start
        print('Epoch [%d/%d], Loss: %.4f, Time (s): %d' % (
            epoch+1, epochs, avg_loss, time_elapsed))

        # Evaluate the model
        net.eval()  # Change model to 'eval' mode (BN uses moving mean/var).
        val_loss = 0
        for batch, (images, masks) in enumerate(validLoad):
          output = net(images)
          val_loss += CrossEntropyLoss(output, masks)
          del output
        val_loss = val_loss/len(validLoad)
        time_elapsed = time.time() - start
        valid_losses.append(val_loss.data.item())
        print('Epoch [%d/%d], Val Loss: %.4f, Time(s): %d' % (
            epoch+1, epochs, val_loss, time_elapsed))
        torch.cuda.empty_cache()
    test_loss = 0
    for batch, (images, masks) in enumerate(testLoad):
        output = net(images)
        test_loss += CrossEntropyLoss(output, masks)
        del output

    test_loss = test_loss/len(testLoad) 
    print("Test Loss: ", test_loss)
    del test_loss
    # Plot training curve
    plt.figure()
    plt.plot(train_losses, "ro-", label="Train")
    plt.plot(valid_losses, "go-", label="Validation")
    plt.legend()
    plt.title("Loss")
    plt.xlabel("Epochs")
    plt.savefig("training_curve.png")

    
    print('Saving model...')
    torch.save(net.state_dict(), "net.pth")
    
    return net

In [0]:
torch.manual_seed(693)
r = train("res", 0, 3,11,clip=1,wd=100)
torch.manual_seed(693)
r1 = train("res", 1, 3,11,clip=1,wd=100)
torch.manual_seed(693)
r2 = train("res", 2, 3,11,clip=1,wd=100)

In [0]:
torch.manual_seed(693)
r = train("percep", 0, epochs=11,lr=0.0001,clip=1)
torch.manual_seed(693)
r1 = train("percep", 1, epochs=9,lr=0.0001,clip=1)
torch.manual_seed(693)
r2 = train("percep", 2, epochs=9,lr=0.0001,clip=1)

In [0]:
def accuracy(net, dataset):
    if (dataset == 0):
      path = "/content/drive/My Drive/CSC420P/data/original/"
    # Augmeted with SIFT
    elif (dataset == 1):
      path = "/content/drive/My Drive/CSC420P/data/change1/"
    # RC Plot
    else:
      path = "/content/drive/My Drive/CSC420P/data/RCplot1/"
    trainLoad, validLoad, testLoad = prep_data(path, "/content/drive/My Drive/CSC420P/labels.csv")
    testacc = 0
    for batch, (img, mask) in enumerate(testLoad):
      out = net(img)
      for i in range(img.size(0)):
        if out[i,0] > 0.5 and mask[i] > 0.5:
          testacc += 1
        elif out[i,0] < 0.5 and mask[i] < 0.5:
          testacc+= 1
    testacc/75
    acc = 0
    for batch, (img, mask) in enumerate(validLoad):
      out = net(img)
      for i in range(img.size(0)):
        if out[i,0] > 0.5 and mask[i] > 0.5:
          acc += 1
        elif out[i,0] < 0.5 and mask[i] < 0.5:
          acc+= 1
    print(acc/75, testacc/75)