In [6]:
import os
import torch
from torch import nn, optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from sklearn.metrics import r2_score
import pandas as pd

from mlp import MLP
from lantentDataset import LatentDataset

learning_rate = 0.001


dataFile = os.path.join('.\\data', 'Pancancer_LatentVec_Drug+GeneExp(cgc+eliminated+sampledGene+unsampledDrug).txt')



num=0
while num<6:
    num += 1
    print("-----------------------------\n-------------"+str(num)+"---------------\n-----------------------------")

    data = pd.read_table(open(dataFile), sep='\t')
    data = data.sample(frac=1).reset_index(drop=True)
    print(data.head())
    trainData = data.loc[: 12244, :]
    trainDataset = LatentDataset(trainData, train0val1test2=0)
    validationData = data.loc[12245: 12924, :]
    validationDataset = LatentDataset(validationData, train0val1test2=1)
    testData = data.loc[12925: 13604, :]
    testDataset = LatentDataset(testData, train0val1test2=2)

    trainLoader = DataLoader(trainDataset, batch_size=64, shuffle=True, drop_last=True)
    validationLoader = DataLoader(validationDataset, batch_size=680, drop_last=True)
    testLoader = DataLoader(testDataset, batch_size=680, drop_last=True)
    
    
    model = MLP()
    '''
    for m in model.modules():
        if isinstance(m, (nn.Conv2d, nn.Linear)):
            nn.init.kaiming_normal_(m.weight, mode='fan_in')
    '''

    if torch.cuda.is_available():
        model = model.cuda()

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.000005)


    epoch = 0
    bestR2 = -1
    bestLoss = 200
    bestEpoch = 0
    path = '.\\trainedModels\\'
    while epoch < 2:

        model.train()
        for batch in trainLoader:

            geLatentVec, dLatentVec, target = batch

            # if geLatentVec.shape[0] != 50:
            #     continue

            if torch.cuda.is_available():
                geLatentVec = geLatentVec.cuda()
                dLatentVec = dLatentVec.cuda()
                target = target.cuda()
            else:
                geLatentVec = Variable(geLatentVec)
                dLatentVec = Variable(dLatentVec)
                target = Variable(target)
            out = model(geLatentVec, dLatentVec)
            loss = criterion(out, target)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        epoch += 1
        if epoch % 2 == 0:

            model.eval()
            for batch in validationLoader:
                geLatentVec, dLatentVec, target = batch
                if torch.cuda.is_available():
                    geLatentVec = geLatentVec.cuda()
                    dLatentVec = dLatentVec.cuda()
                    target = target.cuda()

                out = model(geLatentVec, dLatentVec)
                loss = criterion(out, target)
                evalLoss = loss.data.item()

                out = out.data.cpu().numpy().tolist()
                target = target.cpu().numpy().tolist()
                r2 = r2_score(target, out)
                # SS_tot = torch.std(target)
                # SS_res = evalLoss

                print('epoch: {}, Validation Loss: {:.6f}, R2_Score: {:.6f}'.format(epoch, evalLoss, r2))
                if (r2 > bestR2 and epoch > 20):
                    bestLoss = evalLoss
                    bestR2 = r2
                    bestEpoch = epoch
                    torch.save(model.state_dict(), path + 'modelParameters.pt')
                    print("Got a better model!")
            # print('epoch: {}, loss: {:.4}'.format(epoch, loss.data.item()))

        pass


    path = '.\\trainedModels\\'
    model.load_state_dict(torch.load(path + 'modelParameters.pt'))
    print('\nNow testing the best model on test dataset\n')
    for batch in testLoader:
        geLatentVec, dLatentVec, target = batch
        if torch.cuda.is_available():
            geLatentVec = geLatentVec.cuda()
            dLatentVec = dLatentVec.cuda()
            target = target.cuda()

        out = model(geLatentVec, dLatentVec)
        loss = criterion(out, target)
        evalLoss = loss.data.item()
        # SS_tot = torch.std(target)
        # SS_res = evalLoss
        out = out.data.cpu().numpy().tolist()
        target = target.cpu().numpy().tolist()
        r2 = r2_score(target, out)

        print('epoch: {}, Validation Loss: {:.6f}, R2_Score: {:.6f}'.format(bestEpoch, bestLoss, bestR2))
        print('Test Loss: {:.6f}, R2_Score: {:.6f}'.format(evalLoss, r2))


        df = pd.read_table('.\\R2_Score_relu6(cgc+unsampledGene+unsampledDrug).txt', sep='\t')
        df = df.append({'id': int(len(df)), 'R2_test': r2, 'R2_val': bestR2, 'epoch': bestEpoch}, ignore_index=True)
        df.to_csv('.\\R2_Score_relu6(cgc+unsampledGene+unsampledDrug).txt', sep='\t', index=False)










-----------------------------
-------------1---------------
-----------------------------




  CELL_LINE_NAME  DRUG_ID   LN_IC50         d0         d1         d2  \
0          8305C      255  5.079000   3.234940  -7.889980  -5.743248   
1          A2780      204 -1.466305   1.880497  -9.600801  -2.706006   
2            CMK      204 -1.577082   1.880497  -9.600801  -2.706006   
3         BICR22      156  0.655159  16.266525 -20.031952  11.050550   
4           EHEB       45  0.428457  15.488843 -31.318274 -13.421081   

          d3         d4        d5         d6  ...     ge246     ge247  \
0 -10.958365   8.796126 -6.902458  -6.049602  ...  3.095719  0.336501   
1  15.868850   0.559013 -0.874066   7.290702  ... -0.483628  4.978731   
2  15.868850   0.559013 -0.874066   7.290702  ...  0.510041  1.883173   
3  -7.987223   3.008411 -3.711324 -10.742310  ...  3.472140  0.516859   
4  10.794061  11.477291 -0.764784 -16.953358  ...  6.602766  1.055864   

      ge248     ge249     ge250     ge251     ge252     ge253      ge254  \
0  2.702074  1.755426  0.218852 -0.493401 -0.832422 

KeyError: 3000