# Setup

In [1]:
import pandas as pd
import numpy as np
import random

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [5]:
data = {"AAGGA" : 0,
        "AAGGA" : 1,
        "AAGGA" : 2, 
        "AAGGT" : 3, 
        "ATGGA" : 4, }
        # . . . 66

In [7]:
input=torch.tensor([0, 1, 2])
input

tensor([0, 1, 2])

In [9]:
embeddings = nn.Embedding(66, 2)
embeddings(input)

tensor([[-0.0235, -1.2632],
        [ 0.4791,  0.8433],
        [ 2.8003,  2.0574]], grad_fn=<EmbeddingBackward0>)

# Linear Regression Trial

In [10]:
import torch
# Import the neural network module from pytorch
import torch.nn as nn

# Linear regression
# f = w * x 
# here : f = 2 * x

# 0) Training samples, watch the shape!. Here we have (8, 1) for 8 observations of 1 feature each
X = torch.tensor([[1], [2], [3], [4], [5], [6], [7], [8]], dtype=torch.float32)
Y = torch.tensor([[2], [4], [6], [8], [10], [12], [14], [16]], dtype=torch.float32)

n_samples, n_features = X.shape
print(f'n_samples = {n_samples}, n_features = {n_features}')

# 0) create a test sample
X_test = torch.tensor([5], dtype=torch.float32)

n_samples = 8, n_features = 1


In [11]:
# 1) Design Model, the model has to implement the forward pass!

# Here we could simply use a built-in model from PyTorch
# model = nn.Linear(input_size, output_size)

# Pytorch model class must ALWYAS inherit from nn.module
class LinearRegression(nn.Module):
    # Must always init the pytorch model class
    def __init__(self, input_dim, output_dim):
        # Need to super init
        super(LinearRegression, self).__init__()
        # define different layers. Here there is only one linear for the linear regression
        # nn.Linear performs the linear regression operation w*x + b
        self.lin = nn.Linear(input_dim, output_dim)

    # Apply the layers. Need to include x in function signature so model has input
    def forward(self, x):
        return self.lin(x)

# Specifying the input and output dimensions
input_size, output_size = n_features, n_features
# Insantiate Linear Regression Neural Network Model
model = LinearRegression(input_size, output_size)

print(f'Prediction before training: f({X_test.item()}) = {model(X_test).item():.3f}')

# 2) Define loss and optimizer
learning_rate = 0.01
n_epochs = 100
# Automatically implements the MSE formula
loss = nn.MSELoss()
# Use Stochastic Gradient Descent. Need to supply the model parameters 
# and a selected learning rate
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# 3) Training loop
for epoch in range(n_epochs):
    # predict = forward pass with our model
    # Internally this calls model.forward(x), performing the linear regression
    # and returning the predicted y_values
    y_predicted = model(X)

    # loss
    l = loss(Y, y_predicted)

    # calculate gradients = backward pass
    l.backward()

    # update weights. This updates model.parameters()
    optimizer.step()

    # zero the gradients after updating
    optimizer.zero_grad()

    if (epoch+1) % 10 == 0:
        w, b = model.parameters() # unpack parameters. In this instance the weight & the bias
        print('epoch ', epoch+1, ': w = ', w[0][0].item(), ' loss = ', l.item())
        

print(f'Prediction after training: f({X_test.item()}) = {model(X_test).item():.3f}')

Prediction before training: f(5.0) = -2.208
epoch  10 : w =  1.9433797597885132  loss =  0.020263902842998505
epoch  20 : w =  1.9468997716903687  loss =  0.018501583486795425
epoch  30 : w =  1.9489827156066895  loss =  0.017078982666134834
epoch  40 : w =  1.9509832859039307  loss =  0.015765808522701263
epoch  50 : w =  1.9529054164886475  loss =  0.014553562738001347
epoch  60 : w =  1.9547522068023682  loss =  0.013434533029794693
epoch  70 : w =  1.956526517868042  loss =  0.012401577085256577
epoch  80 : w =  1.9582313299179077  loss =  0.011448007076978683
epoch  90 : w =  1.959869146347046  loss =  0.010567796416580677
epoch  100 : w =  1.9614428281784058  loss =  0.009755214676260948
Prediction after training: f(5.0) = 10.024


# Trial

## Get Data Helpers

In [95]:
# To be able to import the file
import sys
pathname="/Users/carelchay/Desktop/School/Modules/DSA4262/Project 2/DSA4262-frontasticfour/scripts"
if pathname not in sys.path:
    sys.path.append(pathname)
path_to_data = "/Users/carelchay/Desktop/School/Modules/DSA4262/Project 2/data/data.json"

In [96]:
import getData as gd

In [97]:
getDat = gd.getData(path_to_data=path_to_data)

In [98]:
# Get K-mer dictionary
kmer_dct = getDat.get_unique_kmers()

# Training Dataset

In [99]:
df = getDat.get_data(num_entries=10)

In [100]:
df.head()

Unnamed: 0,transcript,position,k-mer bases,values
0,ENST00000000233,244,AAGACCA,"[[0.00299, 2.06, 125.0, 0.0177, 10.4, 122.0, 0..."
1,ENST00000000233,261,CAAACTG,"[[0.0126, 1.95, 111.0, 0.0125, 1.27, 108.0, 0...."
2,ENST00000000233,316,GAAACAG,"[[0.00432, 2.02, 104.0, 0.00299, 3.56, 99.3, 0..."
3,ENST00000000233,332,AGAACAT,"[[0.0134, 4.71, 132.0, 0.00447, 4.24, 98.8, 0...."
4,ENST00000000233,368,AGGACAA,"[[0.015, 6.97, 118.0, 0.0106, 3.04, 123.0, 0.0..."


# Build Neural Network

In [2]:
import torch
import torch.nn as nn

In [326]:
def gen_one(fixed=False, idx=1):
    q = lambda : random.randint(0, 66)
    if not fixed:
        return [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, q(), q(), q()]
    else:
        return [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, idx, idx+1, idx+2]
        

def gen_vect(size, fixed=False, idx = 1):
    if not fixed:
        return np.array([gen_one(idx = idx) for i in range(size)])
    else :
        return np.array([gen_one(True, idx = idx) for i in range(size)])

In [363]:
class m6aNet(nn.Module):
    def __init__(self, batchsize, readsize):
        self.batchsize = batchsize
        self.readsize = readsize
        super(m6aNet, self).__init__()
        # Embedding Layer
        self.embed = nn.Embedding(66, 2)

        ## First Layer ##
        self.read_level_prob_1 = nn.Linear(15, 150)
        # First Batch Norm Layer
        self.norm_1 = nn.BatchNorm1d(num_features=150)
        # First Activation Layer
        self.activ_1=nn.ReLU()

        ## Second Layer ##
        self.read_level_prob_2 = nn.Linear(150, 32)
        # Second Activation Layer
        self.activ_2=nn.ReLU()

        ## Third Layer ##
        self.read_level_prob_3 = nn.Linear(32, 1)
        # Sigmoid Activation
        self.sig_1 = nn.Sigmoid()

    
    
    def forward(self, x):
        ### X is a numpy array of shape (batchsize, readsize=20, 12) ###
        
#         # Convert to tensor
#         x = torch.tensor(x)
        # Extract numeric features        
        numerics = x[:, :, :9]
        # # Extract Bases
        bases = x[:, :, 9:].type(torch.int64)
        # # Feed to embedding layer
        bases = self.embed(bases)

        # Reshape
        bases = bases.reshape(self.batchsize, self.readsize, 3*2)
        # Combine embedded output with numeric features
        x = torch.concat((numerics, bases), 2).type(torch.float)

        #### Feed Forward  ####

        ## First Layer ##
        x = self.read_level_prob_1(x)
        # First Batch Norm Layer
        x = x.transpose(dim0=1, dim1=2) # Need to transpose first
        x = self.norm_1(x)
        x = x.transpose(dim0=1, dim1=2) # Then transpose back
        # First Activation Layer
        x= self.activ_1(x)

        ## Second Layer ##
        x = self.read_level_prob_2(x)
        # Second Activation Layer
        x = self.activ_2(x)

        ## Third Layer ##
        x = self.read_level_prob_3(x)
        # Sigmoid Activation
        x = self.sig_1(x)
        x = x.reshape(-1, self.readsize)

        # Final Output
        r = 1 - torch.prod(1 - x, axis=1)
        return r

            
        
        
        
        
        

In [366]:
model=m6aNet(batchsize=2, readsize=20)
results = model.forward(combined)

In [None]:
learning_rate = 0.01
n_epochs = 10
# Automatically implements the MSE formula
loss = nn.BCELoss()
# Use Stochastic Gradient Descent. Need to supply the model parameters 
# and a selected learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)