In [1]:
import torch  # to use PyTorch (optimized tensor library for deep learning using GPU and CPU)
from torch import nn  #module torch. nn: diff classes, help  build nns
from torch.utils.data import DataLoader, Dataset
import numpy as np   # NumPy: Python library used for working w/arrays
                     #np.arrange, np.random.shuffle
import pandas as pd  #pandas: software library for data manipulation + analysis
                     #pd.read.csv   

from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
import torch.optim as optim

import torchvision
import math

In [2]:
torch.cuda.is_available()

True

In [3]:
path = 'C:/Users/misha/Desktop/Python codes/Julia/training data/'

In [4]:
df1 = pd.read_csv(path + "parameters.csv", header=None)  #reads csv file; makes 0, 1, 2, 3 the heading instead of the first row of parameter values

In [5]:
df1.head() 
print(df1.head()) #gives first 5 rows
print(df1.shape[0]) #prints number of rows

       0     1     2     3
0  22.01  0.72  1.21  0.61
1  19.77  0.14  1.29  0.95
2  24.64  1.32  1.21  0.66
3  24.55  0.87  1.42  0.99
4  21.79  0.51  1.26  0.76
65611


In [6]:
# !gdown --id 1zGRT4aNV71ASxFXsG4MTZb0ZaqNslZSf #downloads real_t.csv file 

In [7]:
df2 = pd.read_csv(path + "real_t.csv", header=None)  #reads csv file; makes 0, 1, 2, 3 the heading instead of the first row of real_t values

In [8]:
df2.head() 
print(df2.head()) #prints first 5 rows
print(df2.shape[0]) #prints number of rows

        0         1         2         3         4         5         6    \
0 -0.923212 -0.913157 -0.901995 -0.889732 -0.876373 -0.861930 -0.846413   
1 -0.150980 -0.107935 -0.064665 -0.021280  0.022109  0.065395  0.108474   
2 -0.949474 -0.942679 -0.934842 -0.925965 -0.916052 -0.905110 -0.893145   
3 -0.015974  0.031830  0.077765  0.121477  0.162622  0.200877  0.235939   
4 -0.762562 -0.739172 -0.714570 -0.688787 -0.661854 -0.633810 -0.604694   

        7         8         9    ...       291       292       293       294  \
0 -0.829833 -0.812206 -0.793548  ...  0.245705  0.230979  0.215836  0.200349   
1  0.151243  0.193603  0.235461  ...  0.087291  0.242819  0.391866  0.527432   
2 -0.880165 -0.866178 -0.851196  ...  0.825729  0.855296  0.881989  0.905727   
3  0.267534  0.295418  0.319384  ...  0.435294  0.451729  0.466411  0.479366   
4 -0.574548 -0.543418 -0.511353  ... -0.068997 -0.047779 -0.019230  0.016327   

        295       296       297       298       299       300  
0  0

In [9]:
#parameters
lr = 1e-6
# momentum = 0.9
dropout_rate = 0.3
td_ratio = 1-dropout_rate  # define percent of data that's training data
batch_size = 200

In [10]:
torch.manual_seed(0) # Sets the seed for generating random numbers. will have the same random numbers each time

# split the full data into training and test
x_full=torch.tensor(df1.iloc[0:len(df1)].values).float().cuda()   #full x data from parameters.csv
y_full=torch.tensor(df2.iloc[0:len(df2)].values).float().cuda()   #full y data from real_t

inx=np.arange(len(df1)) # create array 0 to length(df1)-1
np.random.shuffle(inx)   #randomly shuffle the index of the data
 
dropout_rate = dropout_rate
td_ratio = 1-dropout_rate #percent training data
p= td_ratio  # define percent of data that's training data
train_size= math.ceil(int(td_ratio*(len(df1)))/batch_size)*batch_size # size of training data
# ^ prevent cases like 4 batches of 200 and one of 156, etc
training_idx, test_idx = inx[:train_size], inx[train_size:] # index of training and test data
x_train, x_test = x_full[training_idx,:], x_full[test_idx,:] #training x data and test data with shuffled inx 
y_train, y_test = y_full[training_idx,:], y_full[test_idx,:] #training y data and test data with shuffled inx 

#convert the data into N*1*4 shape or N*1*301 shape
x_train=x_train.reshape(len(x_train),1,4)
x_test=x_test.reshape(len(x_test),1,4)
y_train=y_train.reshape(len(y_train),1,301)
y_test=y_test.reshape(len(y_test),1,301)


#print data shapes 
print('full x data shape:',x_full.shape, x_full.device)
print('x train data shape:', x_train.shape, x_train.device)
print('x test data shape:', x_test.shape, x_test.device)
print('y full data shape:',  y_full.shape, y_full.device)
print('y train data shape:', y_train.shape, y_train.device)
print('y test data shape:', y_test.shape, y_test.device)

full x data shape: torch.Size([65611, 4]) cuda:0
x train data shape: torch.Size([46000, 1, 4]) cuda:0
x test data shape: torch.Size([19611, 1, 4]) cuda:0
y full data shape: torch.Size([65611, 301]) cuda:0
y train data shape: torch.Size([46000, 1, 301]) cuda:0
y test data shape: torch.Size([19611, 1, 301]) cuda:0


In [11]:
# define dataset class
class SpectroData(Dataset):
  def __init__(self, data): #load data and convert to tensors
    self.data = data
  def __getitem__(self, index): #gives data from imported dataset
    return self.data[index] 
  def __len__(self):  #returns length of tensor
    return len(self.data)
#combine training data for dataloader
train_dataset1 = torch.utils.data.TensorDataset(x_train, y_train)
test_dataset1= torch.utils.data.TensorDataset(x_test, y_test)  

train_dataset=SpectroData(train_dataset1)
test_dataset=SpectroData(test_dataset1)

#load x_train and y_train together
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False) 
# , num_workers=0, pin_memory=True) 
 

In [12]:
def f(x, W1,W2, V, b): #def bilinear function; x is a N*1*4
 Q=torch.randn(len(x),1,50).cuda() #Q is N*1*50  
 A = torch.randn(50,1).cuda() #define A
 F = torch.randn(50,1).cuda() #define F
 for i in range (0,len(x)): 
     e=torch.transpose(x[i],1,0).cuda() #transpose x from row to column (check) 
     et = torch.transpose(e,1,0).cuda()   #transpose e from row to column (check)
     esq = torch.mul(e, e).cuda()  #squares e
     for k in range (0, 50): 
         A[k, 0] = torch.mm(et,torch.mm(W1[k, :, :], e))  #puts values together -> 50x1 tensor
         F[k, 0] =torch.mm(et,torch.mm(W2[k, :, :], esq))  #matrix multiplication
     ecat = torch.cat((e, esq)) 
     L=torch.mm(V, ecat)
     Q[i]= torch.transpose(A + F + L + b, 1,0) #adds all four 50x1 tensors -> output
 return(Q)
 

In [13]:
class NeuralNetwork(nn.Module):   #https://pytorch.org/tutorials/beginner/basics/buildmodel_tutorial.html
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()

        self.W1 = torch.nn.Parameter(torch.randn(50, 4, 4)) #define W, tensor, 50 layers of 4x4 matrices
        self.W2 = torch.nn.Parameter(torch.randn(50, 4, 4)) #define W, tensor, 50 layers of 4x4 matrices
        #W2 = torch.randn(50, 4, 4, requires_grad=True) #define W2
        self.V = torch.nn.Parameter(torch.randn(50,8))    #define V
        self.b = torch.nn.Parameter(torch.randn(50,1))    #define b        
        self.linear_relu_stack = nn.Sequential(   #nn.Sequential -> output of each layer as input of next layer
            nn.ReLU(),  #ReLU: activation function (defines output/what goes into next layer)
            nn.Linear(50, 500), # creates single layer feed forward network with 50 inputs and 500 outputs
            nn.ReLU(),
            nn.Linear(500, 500),
            nn.ReLU(),
            nn.Linear(500, 500),
            nn.ReLU(),
            nn.Linear(500, 301),
        )
        self.dropout = nn.Dropout(dropout_rate)
    
    def forward(self, x): #def forward function
       Q=f(x, self.W1,self.W2,self.V, self.b)
       logits = self.linear_relu_stack(Q)
       return logits  

In [14]:
model = NeuralNetwork().cuda() 

In [None]:
#training the model
print(x_train.shape)
from torch.autograd import Variable  # Variable wraps tensor, gives way to perform backpropagation
#x_train , y_train =(Variable(x_train),Variable(y_train))

n_batches=math.ceil(len(x_train)/batch_size) # number of batches in train data
print("number of batches: ", n_batches)

# optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum)
optimizer = optim.Adam(model.parameters(), lr = lr)

loss_func = nn.MSELoss()
loss = Variable(torch.FloatTensor(1)).zero_()
loss.requires_grad = True
 

Trainloss = [] # loss of every epoch in iteration for training data
Testloss = [] #loss of every epoch in iteration for test data
r_squared = []
indices = []  #epochs

y_train1=y_train.reshape(len(y_train),301).cuda()
y_train2 = y_train1.detach().cuda()
y_pred = []

num_epoch = 100
for epoch in range(num_epoch):
    # Forward pass: Compute predicted y by passing x to the model
    # Zero gradients, perform a backward pass, and update the weights.


    i=0 # counter
    for x_train_batch, y_train_batch in train_loader:
        x_train_batch, y_train_batch =(Variable(x_train_batch),Variable(y_train_batch))
        y_pred_batch = model(x_train_batch)
       #combine all y_pred_batch into y_pred
        if i==0: 
            y_pred=  y_pred_batch
        else:
            y_pred = torch.cat((y_pred, y_pred_batch),0)
        i=i+1
        loss =loss_func(y_train_batch, y_pred_batch)
        if i%10==1:
              print("loss at batch",i ,loss)
        optimizer.zero_grad()
#     loss.requires_grad = True
        loss.backward(retain_graph=True)
        optimizer.step()
    loss_sum =loss_func(y_pred, y_train.cuda())
    if epoch%5==1:
        print("epoch", epoch,  " loss_sum:", loss_sum)
#r-squared score
    y_pred1=y_pred.reshape(len(y_train),301)
    y_pred2 = y_pred1.detach()
    r2=r2_score(y_train2.cpu(), y_pred2.cpu())
    r_squared.append(r2)
    print("r2:", r_squared)
    
# Compute and print loss
    Trainloss.append(loss_sum.item())
    print("trainloss:", Trainloss)
    indices.append(epoch+1)


torch.Size([46000, 1, 4])
number of batches:  230
loss at batch 1 tensor(160778.9531, device='cuda:0', grad_fn=<MseLossBackward0>)
loss at batch 11 tensor(138617.1875, device='cuda:0', grad_fn=<MseLossBackward0>)
loss at batch 21 tensor(142792.4062, device='cuda:0', grad_fn=<MseLossBackward0>)
loss at batch 31 tensor(132587.3750, device='cuda:0', grad_fn=<MseLossBackward0>)
loss at batch 41 tensor(125679.5781, device='cuda:0', grad_fn=<MseLossBackward0>)
loss at batch 51 tensor(125273.5156, device='cuda:0', grad_fn=<MseLossBackward0>)
loss at batch 61 tensor(133288.5312, device='cuda:0', grad_fn=<MseLossBackward0>)
loss at batch 71 tensor(119682.4297, device='cuda:0', grad_fn=<MseLossBackward0>)
loss at batch 81 tensor(130596.2891, device='cuda:0', grad_fn=<MseLossBackward0>)
loss at batch 91 tensor(109719.6562, device='cuda:0', grad_fn=<MseLossBackward0>)
loss at batch 101 tensor(106692.6562, device='cuda:0', grad_fn=<MseLossBackward0>)
loss at batch 111 tensor(111921.4453, device='cu

loss at batch 21 tensor(10052.2344, device='cuda:0', grad_fn=<MseLossBackward0>)
loss at batch 31 tensor(9331.8682, device='cuda:0', grad_fn=<MseLossBackward0>)
loss at batch 41 tensor(8869.4082, device='cuda:0', grad_fn=<MseLossBackward0>)
loss at batch 51 tensor(8843.5322, device='cuda:0', grad_fn=<MseLossBackward0>)
loss at batch 61 tensor(9411.2627, device='cuda:0', grad_fn=<MseLossBackward0>)
loss at batch 71 tensor(8464.6270, device='cuda:0', grad_fn=<MseLossBackward0>)
loss at batch 81 tensor(9254.5605, device='cuda:0', grad_fn=<MseLossBackward0>)
loss at batch 91 tensor(7782.6328, device='cuda:0', grad_fn=<MseLossBackward0>)
loss at batch 101 tensor(7577.3179, device='cuda:0', grad_fn=<MseLossBackward0>)
loss at batch 111 tensor(7964.2651, device='cuda:0', grad_fn=<MseLossBackward0>)
loss at batch 121 tensor(8149.8330, device='cuda:0', grad_fn=<MseLossBackward0>)
loss at batch 131 tensor(6620.4375, device='cuda:0', grad_fn=<MseLossBackward0>)


In [None]:
FILE="model_feb27.pth"
torch.save(model, FILE)
model_save=torch.load(FILE)
model_save.eval()
for param in model_save.parameters():
    print(param)

In [None]:
save_path="./savedmodel.pth"           # save the trained model
torch.save(model.state_dict(),save_path )
device=torch.device("cuda")
model = NeuralNetwork()
model.load_state_dict(torch.load(save_path))# load the trained model 
model.to(device)
model.eval() 
y_pred_train=[]
i=0 
for x_train_batch, y_train_batch in train_loader:
         x_train_batch, y_train_batch =(Variable(x_train_batch),Variable(y_train_batch))
         y_pred_batch = model(x_train_batch)
         #print(y_pred_batch)
         # combine all y_pred_batch into y_pred
         if i==0: 
            y_pred_train=  y_pred_batch
         else:
            y_pred_train = torch.cat((y_pred_train, y_pred_batch),0)
         i=i+1
     # prediction_train = model(x_train)
loss_train = loss_func(y_pred_train,y_train.cuda())
print(loss_train)
    
    
y_pred_test=[]
i=0 
for x_test_batch, y_test_batch in test_loader:
         x_test_batch, y_test_batch =(Variable(x_test_batch),Variable(y_test_batch))
         y_pred_batch = model(x_test_batch)
         #print(y_pred_batch)
         # combine all y_pred_batch into y_pred
         if i==0: 
            y_pred_test=  y_pred_batch
         else:
            y_pred_test = torch.cat((y_pred_test, y_pred_batch),0)
         i=i+1
     # prediction_train = model(x_train)
loss_test = loss_func(y_pred_test,y_test.cuda())
print(loss_test)
# with torch.no_grad():
#      prediction_train = model(x_train)
#      loss = loss_func(prediction_train,y_train)
#      print(loss)

In [None]:

y_pred_test=[]

for epoch in range(100,102):
    # Forward pass: Compute predicted y by passing x to the model
    # Zero gradients, perform a backward pass, and update the weights.


    i=0 # counter
    for x_train_batch, y_train_batch in train_loader:
        x_train_batch, y_train_batch =(Variable(x_train_batch),Variable(y_train_batch))
        y_pred_batch = model(x_train_batch)
       #combine all y_pred_batch into y_pred
        if i==0: 
            y_pred=  y_pred_batch
        else:
            y_pred = torch.cat((y_pred, y_pred_batch),0)
        i=i+1
        loss =loss_func(y_train_batch, y_pred_batch)
        if i%10==1:
              print("train loss at batch",i ,loss)
        optimizer.zero_grad()
#     loss.requires_grad = True
        loss.backward(retain_graph=True)
        optimizer.step()
    loss_sum =loss_func(y_pred, y_train.cuda())
    print( "epoch", epoch, " train loss: " , loss_sum)
        
        
        
    j=0 
    for x_test_batch, y_test_batch in test_loader:
         x_test_batch, y_test_batch =(Variable(x_test_batch),Variable(y_test_batch))
         y_pred_batch = model(x_test_batch)
         #print(y_pred_batch)
         # combine all y_pred_batch into y_pred
         if j==0: 
            y_pred_test=  y_pred_batch
         else:
            y_pred_test = torch.cat((y_pred_test, y_pred_batch),0)
         j=j+1
     # prediction_train = model(x_train)
    loss_test = loss_func(y_pred_test,y_test.cuda())
    print( "epoch", epoch, " test loss: " , loss_test)
 #r-squared score
    y_pred1=y_pred.reshape(len(y_train),301)
    y_pred2 = y_pred1.detach()
    r2=r2_score(y_train2.cpu(), y_pred2.cpu())
    r_squared.append(r2)
    print( "epoch", epoch, " r2: " , r_squared)
    
# Compute and print loss
    Trainloss.append(loss_sum.item())
    print("trainloss:", Trainloss)
    Testloss.append(loss_test.item())
    indices.append(epoch+1)

 
    

In [None]:
save_path="./savedmodel.pth"           # save the trained model
torch.save(model.state_dict(),save_path )
device=torch.device("cuda")
model = NeuralNetwork()
model.load_state_dict(torch.load(save_path))# load the trained model 
model.to(device)
model.eval() 
y_pred_train=[]
with torch.no_grad():
    i=0 
    for x_train_batch, y_train_batch in train_loader:
         x_train_batch, y_train_batch =(Variable(x_train_batch),Variable(y_train_batch))
         y_pred_batch = model(x_train_batch)
         #print(y_pred_batch)
         # combine all y_pred_batch into y_pred
         if i==0: 
            y_pred_train=  y_pred_batch
         else:
            y_pred_train = torch.cat((y_pred_train, y_pred_batch),0)
         i=i+1
     # prediction_train = model(x_train)
    loss_train = loss_func(y_pred_train,y_train.cuda())
    print(loss_train)
    
    
    y_pred_test=[]
    i=0 
    for x_test_batch, y_test_batch in test_loader:
         x_test_batch, y_test_batch =(Variable(x_test_batch),Variable(y_test_batch))
         y_pred_batch = model(x_test_batch)
         #print(y_pred_batch)
         # combine all y_pred_batch into y_pred
         if i==0: 
            y_pred_test=  y_pred_batch
         else:
            y_pred_test = torch.cat((y_pred_test, y_pred_batch),0)
         i=i+1
     # prediction_train = model(x_train)
    loss_test = loss_func(y_pred_test,y_test.cuda())
    print(loss_test)
# with torch.no_grad():
#      prediction_train = model(x_train)
#      loss = loss_func(prediction_train,y_train)
#      print(loss)

In [None]:
plt.plot(indices, '-o')
plt.plot(Trainloss, '-o')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train Loss','Test Loss'])
plt.title('Epochs vs Loss')
 
plt.show()



In [None]:
plt.plot(r_squared, '-o')
plt.xlabel('Epoch')
plt.ylabel('R-squared value')
plt.title('Epoch vs R-squared val')
# plt.legend(['p'])

r2_value=r_squared[5:100]
x_epoch=list(range(5,100))
plt.plot(x_epoch,r2_value, '-o')
plt.xlabel('Epoch')
plt.ylabel('R-squared value')
plt.title('Epoch vs R-squared val')
# plt.legend(['p'])
plt.show()


In [None]:
k=20
print(loss_test)
loss_train=Trainloss[k:100]
loss_test=loss_test[k:100]
x_epoch=list(range(k,100))
plt.plot(x_epoch,loss_train, '-o')
plt.plot(x_epoch,loss_test, '-o')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train Loss','Test Loss'])
plt.title('Epochs vs Loss')
 
plt.show()


In [None]:
 

r2_value=r_squared[80:100]
x_epoch=list(range(80,100))
plt.plot(x_epoch,r2_value, '-o')
plt.xlabel('Epoch')
plt.ylabel('R-squared value')
plt.title('Epoch vs R-squared val')
# plt.legend(['p'])
plt.show()