In [10]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torch.optim as optim

import numpy as np
import pandas as pd
import sys
sys.path.append("..")

In [11]:
# set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [51]:
def one_hot(seqs):
    conversion_dict={
        'A':np.array([1.0,0.0,0.0,0.0]),
        'C':np.array([0.0,1.0,0.0,0.0]),
        'G':np.array([0.0,0.0,1.0,0.0]),
        'U':np.array([0.0,0.0,0.0,1.0])
    }
    enc_seqs=[]
    for seq in seqs:
        enc_arr=conversion_dict[seq[0]]
        for i in seq[1:]:
            enc_arr=np.vstack((enc_arr,conversion_dict[i]))
        #enc_arr=enc_arr.T.reshape((1,4,50))
        enc_arr=torch.tensor(enc_arr.T, dtype = torch.float32)
        enc_seqs.append(enc_arr)
    enc_seqs=torch.tensor(np.array(enc_seqs),dtype = torch.float32)
        
    return enc_seqs
        
    

In [52]:

# load data
dataset1=pd.read_csv('data/random_train_pc.csv')
trainseqs1=one_hot(list(dataset1["utr"]))
trainmrl1=torch.tensor(np.array(dataset1["rl"]),dtype = torch.float32)
dataset1_reshaped=list(zip(trainseqs1,trainmrl1))
batch_size=128
trainloader=torch.utils.data.DataLoader(dataset1_reshaped ,batch_size=batch_size,shuffle=True)
#Daten müssen zu 2 arrays werden= sequenz und mrl


In [54]:
print(trainseqs1.shape,trainmrl1.shape)


torch.Size([260000, 4, 50]) torch.Size([260000])


In [58]:
#Model
class Model(nn.Module):

    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv1d(4, 120, kernel_size = 8, padding='same')
        self.conv2 = nn.Conv1d(120, 120, kernel_size = 8, padding='same')
        self.conv3 = nn.Conv1d(120, 120, kernel_size = 8, padding='same')
        self.flat1 = nn.Flatten()
        self.fc1 = nn.Linear(6000, 40)
        self.drop1 = nn.Dropout(0.2)
        self.out = nn.Linear(40, 1)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        #print(x.shape)
        x = F.relu(self.conv2(x))
        #print(x.shape)
        x = F.relu(self.conv3(x))
        #print(x.shape)
        x = self.flat1(x)
        #x= torch.transpose(x, 1, 2)
        x = F.relu(self.fc1(x))
        x = self.drop1(x)
        x = self.out(x)
        return x



In [59]:
net=Model()
net.to(device)

Model(
  (conv1): Conv1d(4, 120, kernel_size=(8,), stride=(1,), padding=same)
  (conv2): Conv1d(120, 120, kernel_size=(8,), stride=(1,), padding=same)
  (conv3): Conv1d(120, 120, kernel_size=(8,), stride=(1,), padding=same)
  (flat1): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=6000, out_features=40, bias=True)
  (drop1): Dropout(p=0.2, inplace=False)
  (out): Linear(in_features=40, out_features=1, bias=True)
)

In [60]:
criterion = nn.MSELoss()#(aL-y)^2
optimizer = optim.Adam(net.parameters(), lr=0.001, betas=(0.9,0.999))#epsilon ist standradmäßig bei 1e-8


In [85]:
def train_one_epoch():
  net.train(True)

  running_loss = 0.0
  running_accuracy = 0.0

  for batch_index, data in enumerate(trainloader):
    inputs, correct_mrl = data[0].to(device), data[1].to(device)
    
    optimizer.zero_grad()#flush gradient

    outputs = net(inputs) # shape: 
    outputs = torch.reshape(outputs,(-1,))
    #correct = torch.sum(labels == torch.argmax(outputs, dim=1)).item()
    #running_accuracy += correct / batch_size

    loss = criterion(outputs, correct_mrl)
    running_loss += loss.item()
    loss.backward()
    optimizer.step()
    #print(batch_index)
    if batch_index % 500 == 499:  # print every 500 batches
      avg_loss_across_batches = running_loss / 500
      avg_acc_across_batches = (running_accuracy / 500) * 100
      print('Batch {0}, Loss: {1:.3f}, Accuracy: {2:.1f}%'.format(batch_index+1,
                                                          avg_loss_across_batches,
                                                          avg_acc_across_batches))
      running_loss = 0.0
      running_accuracy = 0.0

    

In [84]:
for epoch_index in range(3):
    print(f'Epoch {epoch_index+1} of 3')
    train_one_epoch()

Epoch 1 of 3


KeyboardInterrupt: 

# Testsektion

In [8]:
arr=np.array([[0,1,2],
         [3,4,5],
         [6,7,8]])
arr_1=arr.T
l=[]
l.append(arr)
l.append(arr_1)
print(np.array(l).shape)

(2, 3, 3)
