In [4]:
import torch
import numpy as np 
import os
import pandas as pd
import pickle
from datasets import MeltomeUnirepDataset
import models
from torch.utils.data import DataLoader

In [6]:
# set unirep vector directory, and the path to the dictionary that contains the labels
URdir = "../datasets/Meltome_Vecs"
dict_path = "../datasets/protID2MT.p"
# creating a dataset
MUData = MeltomeUnirepDataset(URdir=URdir, dictPath=dict_path, data="train")
# creating a dataloader
loader = DataLoader(MUData, batch_size = 10, shuffle=True, num_workers=1)

In [7]:
import torch.optim as optimizer
import torch.nn as nn

# creating a classifier
model = models.SimpleNN(h_units=64)

# defining a loss function and optimizer
loss_fn = nn.MSELoss()
adam =  optimizer.Adam(model.parameters(), lr=0.0003)

In [8]:
# determining if able to use gpu

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [None]:
# training the classifier
model.to(device)

for epoch in range(2):
    running_loss = 0.0
    for i, data in enumerate(loader):
        data = MUData[i]
        inputs, label = data["vec"].to(device), data["meltingTemp"].to(device)

        adam.zero_grad()

        output = model(inputs)
        loss = loss_fn(output, label)
        loss.backward()
        adam.step()

        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0


In [17]:
# splitting dataset into 3 folders
ds = []
for _, _, files in os.walk(URdir):
    for name in files:
        ds.append(name)

In [19]:
import random
random.shuffle(ds)

In [20]:
print(len(ds))

31817


In [21]:
print(ds[0:10])

['Q9ESZ8.npy', 'Q8L970.npy', 'P0A7R1.npy', 'G5E8U6.npy', 'Q9SGN6.npy', 'D6VPA0.npy', 'Q72J48.npy', 'Q9NES9.npy', 'P94534.npy', 'P0AB28.npy']


In [22]:
import shutil

trainfiles = ds[0:int(len(ds) * 0.8)]
valfiles = ds[int(len(ds) * 0.8):int(len(ds) * 0.9)]
testfiles = ds[int(len(ds) * 0.9):]

In [25]:
for f in testfiles:
    shutil.move(URdir+"/"+f, "UniRep_Vecs/test")