In [8]:
import pandas
import numpy as np
import matplotlib.pyplot as plt
from google.colab import drive
!pip install patool
import patoolib
import scipy.io
import glob
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
drive.mount('/content/gdrive')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
sim_dir =  '/content/gdrive/My Drive/פרויקט מסכם/simulations/'

# for rar in glob.glob(sim_dir + '*.rar'):
#   patoolib.extract_archive(rar, outdir=sim_dir)

In [13]:
# dataset = []
# labels = []
# for file in glob.glob(sim_dir + '*/*.mat'):
#   mat = scipy.io.loadmat(file)['logY'].toarray()
#   dataset.append(mat)
#   if "homogenous" in file:
#     labels.append(0)
#   else: 
#     labels.append(1)
# dataset = np.array(dataset)
# labels = np.array(labels)
# np.save(sim_dir + "dataset.npy", dataset)
# np.save(sim_dir + "labels.npy", labels)

dataset = np.load(sim_dir + "dataset.npy")
labels = np.load(sim_dir + "labels.npy")

In [14]:
print(dataset.shape)
print(labels.shape)

(10000, 1, 9)
(10000,)


In [30]:
def norm_data(data):
  output = []
  for sample in data: 
    sample = (sample - np.mean(sample))/np.std(sample)
    output.append(np.array(sample))
  return np.array(output)

input_size = 9
num_of_samples = dataset.shape[0]

perm = torch.randperm(num_of_samples)
dataset = dataset[perm]
labels = labels[perm]

x_train, x_val, x_test = dataset[:round(0.7*num_of_samples)], dataset[round(0.7*num_of_samples):round(0.9*num_of_samples)], dataset[round(0.9*num_of_samples):]
t_train, t_val, t_test = labels[:round(0.7*num_of_samples)],  labels[round(0.7*num_of_samples):round(0.9*num_of_samples)],  labels[round(0.9*num_of_samples):]

x_train_norm = norm_data(x_train)
x_val_norm = norm_data(x_val)
x_test_norm = norm_data(x_test)

x_train_norm = np.reshape(x_train_norm, (x_train_norm.shape[0], x_train_norm.shape[1]*x_train_norm.shape[2]))
x_val_norm = np.reshape(x_val_norm, (x_val_norm.shape[0], x_val_norm.shape[1]*x_val_norm.shape[2]))
x_test_norm = np.reshape(x_test_norm, (x_test_norm.shape[0], x_test_norm.shape[1]*x_test_norm.shape[2]))

train_loader = torch.utils.data.DataLoader(
    np.concatenate((t_train[:,None], x_train_norm),axis=1),
    batch_size=64, shuffle=True)

val_loader = torch.utils.data.DataLoader(
    np.concatenate((t_val[:,None], x_val_norm),axis=1),
    batch_size=64, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    np.concatenate((t_test[:,None], x_test_norm),axis=1),
    batch_size=64, shuffle=True)

(7000, 9)


In [76]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size=9, num_hidden=50):
        super(NeuralNetwork, self).__init__()
        self.layer1 = nn.Linear(input_size, num_hidden)
        self.layer2 = nn.Linear(num_hidden, num_hidden//2)
        self.layer3 = nn.Linear(num_hidden//2, 2)
        self.num_hidden = num_hidden
        self.input_size = input_size
    def forward(self, x):
        x = x.reshape([-1, input_size])
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = F.log_softmax(x, dim=1)
        return x 

In [68]:
def get_accuracy(model, loader=train_loader):
    model.eval()
    loss = 0
    correct = 0
    pred_list = []
    true_list = []
    for ar in loader:
        data = ar[:,1:]
        label = ar[:,0]
        # send to device
        data, label = data.to(device), label.to(device)
        
        data = data.view(-1, 9)
        data = data.view(-1, 1, 9, 1)
        pred = model(data)
        loss += F.nll_loss(pred, label.long(), reduction='sum').item() # sum up batch loss                                                               
        pred = pred.data.max(1, keepdim=True)[1] # get the index of the max log-probability                                                                 
        correct += pred.eq(label.data.view_as(pred)).cpu().sum().item()
    # print(f"correct = {correct}")
    # print(f"num_of_ars = {num_of_ars}")
    loss /= len(loader.dataset)
    accuracy = 100. * correct / len(loader.dataset)
    
    return loss, accuracy

def train(model, lr=0.01, max_iters=1000,num_epochs=6):
    model.train()
    train_accs, valid_accs = [], []
    epochs = []
    optimizer = optim.Adam(model.parameters(), lr=lr)
    n = 0 # the number of iterations
    iters, losses = [], []
    iters_sub = []

    for epoch in range(0, num_epochs):

        for batch_idx, ar in enumerate(train_loader):           
            data = ar[:,1:]
            label = ar[:,0]
            # send to device
            data, label = data.to(device), label.to(device)
            # data = data.view(-1, 28*28)
            # data = data.view(-1, 1, 28, 28)

            optimizer.zero_grad()
            pred = model(data)
            loss = F.nll_loss(pred, label.long())
            loss.backward()
            optimizer.step()

            iters.append(n)
            losses.append(loss)

            if batch_idx % 64 == 0: 
                
                iters_sub.append(n)
                train_loss, train_acc = get_accuracy(model, loader=train_loader)
                train_accs.append(train_acc)

                valid_loss, valid_acc = get_accuracy(model, loader=val_loader)
                valid_accs.append(valid_acc)
      
                print("Iter %d. [Val Acc %.0f%%] [Train Acc %.0f%%, Loss %f]" % (n, valid_acc, train_acc, train_loss))

            # increment the iteration number
            n += 1
            if n > max_iters:
                return iters, losses, iters_sub, train_accs, valid_accs
    return iters, losses, iters_sub, train_accs, valid_accs

In [75]:

model = NeuralNetwork(input_size, 20)
model.to(device)

lr = 0.0001

iters, losses, iters_sub, train_accs, valid_accs = train(model.double(), lr=lr, max_iters=10000, num_epochs=15)

Iter 0. [Val Acc 20%] [Train Acc 20%, Loss -0.411191]
Iter 64. [Val Acc 20%] [Train Acc 20%, Loss -0.478383]
Iter 110. [Val Acc 80%] [Train Acc 80%, Loss -0.532014]
Iter 174. [Val Acc 80%] [Train Acc 80%, Loss -0.604607]
Iter 220. [Val Acc 80%] [Train Acc 80%, Loss -0.652987]
Iter 284. [Val Acc 80%] [Train Acc 80%, Loss -0.703607]
Iter 330. [Val Acc 80%] [Train Acc 80%, Loss -0.728165]
Iter 394. [Val Acc 80%] [Train Acc 80%, Loss -0.751346]
Iter 440. [Val Acc 80%] [Train Acc 80%, Loss -0.762710]
Iter 504. [Val Acc 80%] [Train Acc 80%, Loss -0.773093]
Iter 550. [Val Acc 80%] [Train Acc 80%, Loss -0.778350]
Iter 614. [Val Acc 80%] [Train Acc 80%, Loss -0.783341]
Iter 660. [Val Acc 80%] [Train Acc 80%, Loss -0.786021]
Iter 724. [Val Acc 80%] [Train Acc 80%, Loss -0.788705]
Iter 770. [Val Acc 80%] [Train Acc 80%, Loss -0.790226]
Iter 834. [Val Acc 80%] [Train Acc 80%, Loss -0.791888]
Iter 880. [Val Acc 80%] [Train Acc 80%, Loss -0.792772]
Iter 944. [Val Acc 80%] [Train Acc 80%, Loss -0.793