In [222]:
import torch
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
#import matplotlib.pyplot as plt
import numpy as np

In [223]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [224]:
class MiniCNN(torch.nn.Module):
  def __init__(self):
    super(MiniCNN, self).__init__()
    self.conv1 = torch.nn.Conv1d(4, 6, kernel_size=5, padding=2) 
    self.pool = torch.nn.MaxPool1d(kernel_size=2, stride=2)  
    self.conv2 = torch.nn.Conv1d(6, 16, kernel_size=3, padding=1) 

    self.fc1 = torch.nn.Linear(16 * 50, 120)  
    self.fc2 = torch.nn.Linear(120, 2) 

  def forward(self, x):
    x = self.pool(torch.nn.functional.relu(self.conv1(x)))  
    x = self.pool(torch.nn.functional.relu(self.conv2(x))) 
    x = x.view(-1, 16 * 50)  
    x = torch.nn.functional.relu(self.fc1(x))  
    x = self.fc2(x)  
    return x

In [225]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=2):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv1d(4, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool1d(kernel_size=3, stride=2),
            nn.Conv1d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool1d(kernel_size=3, stride=2),
            nn.Conv1d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv1d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv1d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool1d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool1d(6)
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [226]:
def oneHotEncoding(inputFile):
    
    def importFile(path):
        onesAndZeros = []
        sequences = []
        file = open(path, 'r')
        text = file. read()
        file.close()
        lines = text.split('\n')
        for i in range(0, len(lines)):
            x = lines[i].split()
            onesAndZeros.append(x[0])
            sequences.append(x[1])
        return onesAndZeros, sequences

    tLabel, tData = importFile(inputFile)
    
    def encode(seq):
        x = []
        mapping = {'A': [1, 0, 0, 0], 'C': [0, 1, 0, 0], 'G': [0, 0, 1, 0], 'T': [0, 0, 0, 1]}
        for i in range(0, len(seq)):
            line = seq[i]
            y = []
            for char in line.upper():
                if char in mapping:
                    y.append(mapping[char])
            x.append(y)
        return np.array(x)

    eData = encode(tData)

    return tLabel, eData

In [227]:
# Generating random data

random_train_data = np.random.rand(32,1,28, 28)
print(random_train_data.dtype)
random_test_data = np.random.rand(16,1,28, 28)
print(random_test_data.dtype)

float64
float64


In [228]:
# Converting the data to tensor type and floating point type
trainLabels, trainSequences = oneHotEncoding('data/train')
testLabels, testSequences = oneHotEncoding('data/test')

tensor_train_data = torch.from_numpy(trainSequences).float()
tensor_test_data = torch.from_numpy(testSequences).float()

In [229]:
print(tensor_train_data.shape)
print(tensor_test_data.dtype)
#print(len(tensor_data))

torch.Size([750, 200, 4])
torch.float32


In [230]:
# Creating random binary labels. and converting it to tensor

label_test = np.random.choice([0, 1], size=len(tensor_test_data))
label_train = np.random.choice([0, 1], size=len(tensor_train_data))

print(label_train.dtype)
label_test = torch.from_numpy(label_test)
label_train = torch.from_numpy(label_train)
print(label_test.dtype)

int64
torch.int64


In [231]:

# The most important class, a custom data loader, understand how it is working.

class data_class(Dataset):
    def __init__(self,data,label):
        #self.data=data
        #self.data=torch.tensor(data)
        #self.labels=torch.tensor(label)

        self.data = torch.tensor(data, dtype=torch.float32).permute(0, 2, 1)
        self.label = torch.tensor(label, dtype=torch.long)

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self,id):
        data_set=self.data[id]
        labels=self.label[id]

        return data_set,labels

In [232]:
# calling the data_class for the raw random data

train_data=data_class(tensor_train_data,label_train)
test_data=data_class(tensor_test_data,label_test)

  self.data = torch.tensor(data, dtype=torch.float32).permute(0, 2, 1)
  self.label = torch.tensor(label, dtype=torch.long)


In [233]:
# Creating the data loader which is going to load the data to the AI model

train_dataloader=DataLoader(train_data,batch_size=1,shuffle=True)
test_dataloader=DataLoader(test_data,batch_size=1,shuffle=True)

In [234]:
# Instantiating the model and assigning an optimizer to the model and creating a loss function

model=AlexNet().to(device)
optimizer=optim.Adam(params=model.parameters(),lr=0.0001)
loss_fn = nn.CrossEntropyLoss()

In [235]:
def train(model,device,train_dataloader,optimizer,epochs):
    print("inside train")
    model.train()
    for batch_ids, (img, classes) in enumerate(train_dataloader):
        classes=classes.type(torch.LongTensor)
        img,classes=img.to(device),classes.to(device)
        torch.autograd.set_detect_anomaly(True)     
        optimizer.zero_grad()
        output=model(img)
        loss = loss_fn(output,classes)                
        
        loss.backward()
        optimizer.step()
    if(batch_ids +1) % 2 == 0:
        print("Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
            epochs, batch_ids* len(img), len(train_dataloader.dataset),
            100.*batch_ids / len(train_dataloader),loss.item()))

In [236]:
def test(model, device, test_dataloader):
    model.eval()
    test_loss=0
    correct=0
    with torch.no_grad():
        for img,classes in test_dataloader:
            img,classes=img.to(device), classes.to(device)
            y_hat=model(img)
            test_loss+=F.nll_loss(y_hat,classes,reduction='sum').item()
            _,y_pred=torch.max(y_hat,1)
            correct+=(y_pred==classes).sum().item()
        test_loss/=len(test_dataloader)
        print("\n Test set: Avarage loss: {:.0f},Accuracy:{}/{} ({:.0f}%)\n".format(
            test_loss,correct,len(test_dataloader),100.*correct/len(test_dataloader)))
        print('='*30)

In [237]:
# WE ARE USING RANDOM DATA SO THE TRAINING AND TESTING DOES NOT MATTER, THE AIM IS TO SHOWCASE THE USE OF A CUSTOM DATASET
# SINCE IN PRACTICAL SENSE YOU HAVE TO CLEAN THE DATA AND LOAD THE DATA INTO THE MODEL.


if __name__=='__main__':
    seed=42
    EPOCHS=2
    
    for epoch in range(1,EPOCHS+1):
        train(model,device,train_dataloader,optimizer,epoch)
        test(model,device,test_dataloader)

inside train

 Test set: Avarage loss: 0,Accuracy:105/200 (52%)

inside train

 Test set: Avarage loss: 0,Accuracy:105/200 (52%)

