In [11]:
import numpy as np
import csv
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from matplotlib import pyplot as plt
from torch.autograd import Variable
from mpl_toolkits.mplot3d import Axes3D
import sklearn.metrics as metrics
import pickle
import time
from sklearn.kernel_approximation import RBFSampler
%matplotlib notebook


In [12]:
with open('screen_info.txt','rb') as fl:
    t = pickle.load(fl)
fnames = t[0]
totf = t[1]
binf = t[2]
runfile = 2
fname = fnames[runfile]
bf = binf[runfile]

In [13]:
path = os.getcwd() + '/bioassay-datasets/'
p_fingerprints = []
X_features = []
labels = []
with open(path+fname+'red_train.csv') as csvfile:
    readcsv = csv.reader(csvfile)
    for row in readcsv:
        p_fingerprints.append(row[:bf])
        X_features.append(row[bf:-1])
        labels.append(row[-1])

In [14]:
p_fingerprints = np.asarray(p_fingerprints)[1:]
p_fingerprints = p_fingerprints.astype(int)

X_features = np.asarray(X_features)[1:]
X_features = X_features.astype(float)

#Normalise the features
#c_fingerprints = (c_fingerprints - np.mean(c_fingerprints,axis=0))/np.std(c_fingerprints,axis=0)

#p2_fingerprints = np.ones(p_fingerprints.shape)
(no_examples , ip_dim) = p_fingerprints.shape
labels = labels[1:]
print(no_examples,ip_dim)
print("total no of 1s",np.sum(p_fingerprints))
print("total no of 0s",no_examples*ip_dim-np.sum(p_fingerprints))

(7986, 121)
('total no of 1s', 42893)
('total no of 0s', 923413)


In [15]:
p_fingerprints[(p_fingerprints==0)] = -1

In [16]:
labels2 = np.zeros((len(labels),1))
for i,l in enumerate(labels):
    if l=='Active':
        labels2[i] = 1
    else:
        labels2[i] = 0
labels2 = labels2.astype(int)

In [22]:
feat_dim = 250

In [23]:
rbf_feature = RBFSampler(gamma=1,n_components = feat_dim, random_state=1)
c_fingerprints = rbf_feature.fit_transform(X_features)

In [18]:
ind = np.arange(no_examples)
np.random.shuffle(ind)
p_fingerprints = p_fingerprints[ind]
c_fingerprints = c_fingerprints[ind]
labels2 = labels2[ind]

In [19]:
no_active_ele = (sum(labels2))
print(no_active_ele)
batch_size = 400

[22]


In [26]:
def get_train_batch(batch_size,validation_iter=0,binary=True):
    
    if validation_iter == 0: #no validation
        curr_data_size = no_examples
        labels_train = labels2
    else:
        curr_data_size = int(no_examples*0.8)
        interval_size = int(no_examples*0.2)
        
        if(val_iter==1):
            s_ind1 = int((validation_iter)*interval_size)
            end_ind1 = int((validation_iter+1)*interval_size)
            s_ind2 = int((validation_iter + 1) * interval_size)
            end_ind2 = int(no_examples)
        else:
            s_ind1 = 0
            end_ind1 = int((validation_iter)*interval_size)
            s_ind2 = int((validation_iter + 1) * interval_size)
            end_ind2 = int(no_examples)
        
        #print("train_ind ",s_ind1,end_ind1,s_ind2,end_ind2)
        indices = range(s_ind1,end_ind1) + range(s_ind2,end_ind2)
        c_train_data = c_fingerprints[indices]
        p_train_data = p_fingerprints[indices]
        labels_train = labels2[indices]
                               
    samples = np.random.randint(low=0,high=curr_data_size,size=(batch_size,1))
    if binary == True:
        train_batch = p_fingerprints[samples].reshape(batch_size,ip_dim)
        train_batch = train_batch.astype(int)
    else:
        train_batch = c_fingerprints[samples].reshape(batch_size,feat_dim)
        train_batch = train_batch.astype(float)
    
    train_batch = torch.cuda.FloatTensor(train_batch)
    train_batch = Variable(train_batch,requires_grad=False).cuda()
    target = Variable(torch.cuda.LongTensor(labels_train[samples]),requires_grad=False)
    target = target.view(batch_size,)
    return train_batch,target

In [27]:
def get_val_data(validation_iter,binary = True):
    interval_size = int(no_examples)*0.2
    s_ind = int((validation_iter-1)*interval_size)
    e_ind = int((validation_iter) * interval_size)
    if(binary==True):
        train_data = p_fingerprints[s_ind:e_ind]
    else:
        train_data = c_fingerprints[s_ind:e_ind]
    labels_val = labels2[s_ind:e_ind]   
    #print("val ind ",s_ind,e_ind)
    #print train_data.shape, labels_val.shape
    return Variable(torch.cuda.FloatTensor(train_data)),labels_val  

In [28]:
class c_mlp(nn.Module):
    def __init__(self):
        super(c_mlp,self).__init__()
        self.l1 = nn.Linear(feat_dim,500)
        self.l2 = nn.Linear(500,500)
        self.l4 = nn.Linear(500,50)
        self.l5 = nn.Linear(50,2)
        
    def forward(self,x):
        x = F.leaky_relu(self.l1(x))
        x = F.leaky_relu(self.l2(x))
        x = F.leaky_relu(self.l4(x))
        x = (self.l5(x))
        
        return x

In [None]:
min_fn = 15
max_fp = 200
maxtmpdiff = 10
cm_list = []
get_model = 1
for val_iter in range(1,6):
    print("val iter: ",val_iter)
    
   
    #weights_array = [5]
    weights_array = np.linspace(10,20,10)
    for i,w in enumerate(weights_array): 
        mymlp = c_mlp().cuda()
        optimizer = torch.optim.Adagrad(mymlp.parameters(),lr=1e-4)
        criterion = nn.CrossEntropyLoss(weight=torch.cuda.FloatTensor([1,w]))

        for ep in range(2000):
            train_batch,target = get_train_batch(batch_size,binary = False,validation_iter = val_iter)
            model_op = mymlp(train_batch)
            #print(model_op.type)
            #print(target.type)
            loss = criterion(model_op,target)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

    #     if(ep%30==29):
    #         print(loss.data[0])

        ## After training check on cross validation data
        val_data,labels_val = get_val_data(val_iter,binary = False)
        train_op = mymlp(val_data)
        train_op = train_op.cpu().data.numpy()
        pred_labels = np.argmax(train_op,axis=1)
        #tmp_labels = tmp_labels.data.cpu().numpy()
        #print(sum(tmp_labels))
        cf = metrics.confusion_matrix(labels_val,pred_labels).ravel()
        #print(val_iter,w)
        print('tn, fp, fn, tp: ',cf)
        [tn,fp,fn,tp] = cf
        wcf = [val_iter] + [w] + [cf]
        tmpdiff = tp-fn
        if(tmpdiff > maxtmpdiff):
            cm_list.append(wcf)
            if(tmpdiff<maxtmpdiff):
                if(fp < max_fp):
                    max_fp = fp
                    maxtmpdiff = tmpdiff
                    model_path = os.getcwd() + '/kernel_mac' + fname + '_' + str(get_model)
                    torch.save(mymlp.state_dict(),model_path)
                    print("saving model on val: ",val_iter," and weight: ",w)
                    if(get_model<4):
                        get_model + get_model + 1
                    else:
                        break

('val iter: ', 1)
('tn, fp, fn, tp: ', array([1592,    0,    5,    0]))
('tn, fp, fn, tp: ', array([1592,    0,    5,    0]))
('tn, fp, fn, tp: ', array([1592,    0,    5,    0]))
('tn, fp, fn, tp: ', array([1592,    0,    5,    0]))
('tn, fp, fn, tp: ', array([1592,    0,    5,    0]))
('tn, fp, fn, tp: ', array([1592,    0,    5,    0]))
('tn, fp, fn, tp: ', array([1592,    0,    5,    0]))
('tn, fp, fn, tp: ', array([1592,    0,    5,    0]))
('tn, fp, fn, tp: ', array([1592,    0,    5,    0]))
('tn, fp, fn, tp: ', array([1592,    0,    5,    0]))
('val iter: ', 2)
('tn, fp, fn, tp: ', array([1595,    0,    2,    0]))
('tn, fp, fn, tp: ', array([1595,    0,    2,    0]))
('tn, fp, fn, tp: ', array([1595,    0,    2,    0]))
('tn, fp, fn, tp: ', array([1595,    0,    2,    0]))
('tn, fp, fn, tp: ', array([1595,    0,    2,    0]))
('tn, fp, fn, tp: ', array([1595,    0,    2,    0]))
('tn, fp, fn, tp: ', array([1595,    0,    2,    0]))
