In [4]:
# pytorch tensors
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from torch.autograd import Variable
import os
from sklearn.model_selection import train_test_split
import pandas as pd
import pickle

seed=4
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)


%matplotlib inline

In [8]:
lstm = nn.LSTM(3,3,num_layers=2,bidirectional=True)
#there are 4 parameters per LSTM cell. For a 2 layer deep LSTM cell. 
print(lstm.weight_ih_l0.data.is_cuda)
print(lstm.weight_ih_l1.data.is_cuda)
print(lstm.weight_hh_l0.data.is_cuda)
print(lstm.weight_hh_l1.data.is_cuda)

print(lstm.bias_ih_l0.data.is_cuda)
print(lstm.bias_ih_l1.data.is_cuda)
print(lstm.bias_hh_l0.data.is_cuda)
print(lstm.bias_hh_l1.data.is_cuda)
def init_lstm(lstm):
    '''
    input: torch lstm
    output: weights xavier initialized, biases set to 0. 
    '''
    for name, param in lstm.named_parameters():
        print(f"processing:{name,param}")
        if 'bias' in name:
            nn.init.constant(param, 0.0)
        elif 'weight' in name:
            nn.init.xavier_normal(param)
init_lstm(lstm)
#check lstm
print("----------------------")
print(lstm.weight_hh_l1.data)
#verify just this isn't 0. 

False
False
False
False
False
False
False
False
processing:('weight_ih_l0', Parameter containing:
tensor([[ 0.3948,  0.3234, -0.5316],
        [-0.5314, -0.5114,  0.5051],
        [-0.0374,  0.4788,  0.3158],
        [ 0.4603,  0.5318,  0.5515],
        [-0.4121,  0.3356, -0.1010],
        [ 0.2258, -0.0862,  0.2007],
        [ 0.1558,  0.0313,  0.5004],
        [ 0.1755, -0.1903,  0.2413],
        [ 0.2875,  0.3691, -0.3227],
        [-0.2263,  0.5773,  0.3577],
        [-0.3565, -0.5711,  0.4456],
        [ 0.4936,  0.4473,  0.4038]], requires_grad=True))
processing:('weight_hh_l0', Parameter containing:
tensor([[ 0.0454, -0.1491, -0.0678],
        [-0.5677,  0.2599, -0.3704],
        [-0.2456,  0.1175,  0.4153],
        [ 0.2887, -0.2673,  0.4662],
        [-0.3467,  0.3040,  0.3173],
        [ 0.0727,  0.5667,  0.4245],
        [ 0.0882,  0.0305, -0.4309],
        [-0.3195,  0.4416, -0.0095],
        [-0.2118, -0.5731, -0.3095],
        [-0.2579, -0.3883,  0.0132],
        [ 0.5683

  app.launch_new_instance()


In [15]:
#what happens when there are more hidden units than input? the weight parameters are different
lstm = nn.LSTM(3,10,num_layers=1,bidirectional=True)
for name, param in lstm.named_parameters():
        print(f"processing:{name,type(param),param.size()}")

processing:('weight_ih_l0', <class 'torch.nn.parameter.Parameter'>, torch.Size([40, 3]))
processing:('weight_hh_l0', <class 'torch.nn.parameter.Parameter'>, torch.Size([40, 10]))
processing:('bias_ih_l0', <class 'torch.nn.parameter.Parameter'>, torch.Size([40]))
processing:('bias_hh_l0', <class 'torch.nn.parameter.Parameter'>, torch.Size([40]))
processing:('weight_ih_l0_reverse', <class 'torch.nn.parameter.Parameter'>, torch.Size([40, 3]))
processing:('weight_hh_l0_reverse', <class 'torch.nn.parameter.Parameter'>, torch.Size([40, 10]))
processing:('bias_ih_l0_reverse', <class 'torch.nn.parameter.Parameter'>, torch.Size([40]))
processing:('bias_hh_l0_reverse', <class 'torch.nn.parameter.Parameter'>, torch.Size([40]))


$\begin{split}\begin{array}{ll}
i_t = \sigma(W_{ii} x_t + b_{ii} + W_{hi} h_{(t-1)} + b_{hi}) \\
f_t = \sigma(W_{if} x_t + b_{if} + W_{hf} h_{(t-1)} + b_{hf}) \\
g_t = \tanh(W_{ig} x_t + b_{ig} + W_{hg} h_{(t-1)} + b_{hg}) \\
o_t = \sigma(W_{io} x_t + b_{io} + W_{ho} h_{(t-1)} + b_{ho}) \\
c_t = f_t c_{(t-1)} + i_t g_t \\
h_t = o_t \tanh(c_t)
\end{array}\end{split}$

In [16]:
lstm = nn.LSTM(3,10,num_layers=3,bidirectional=True)
for name, param in lstm.named_parameters():
        print(f"processing:{name,type(param),param.size()}")

processing:('weight_ih_l0', <class 'torch.nn.parameter.Parameter'>, torch.Size([40, 3]))
processing:('weight_hh_l0', <class 'torch.nn.parameter.Parameter'>, torch.Size([40, 10]))
processing:('bias_ih_l0', <class 'torch.nn.parameter.Parameter'>, torch.Size([40]))
processing:('bias_hh_l0', <class 'torch.nn.parameter.Parameter'>, torch.Size([40]))
processing:('weight_ih_l0_reverse', <class 'torch.nn.parameter.Parameter'>, torch.Size([40, 3]))
processing:('weight_hh_l0_reverse', <class 'torch.nn.parameter.Parameter'>, torch.Size([40, 10]))
processing:('bias_ih_l0_reverse', <class 'torch.nn.parameter.Parameter'>, torch.Size([40]))
processing:('bias_hh_l0_reverse', <class 'torch.nn.parameter.Parameter'>, torch.Size([40]))
processing:('weight_ih_l1', <class 'torch.nn.parameter.Parameter'>, torch.Size([40, 20]))
processing:('weight_hh_l1', <class 'torch.nn.parameter.Parameter'>, torch.Size([40, 10]))
processing:('bias_ih_l1', <class 'torch.nn.parameter.Parameter'>, torch.Size([40]))
processing

<h6>notes to myself. df.values() cant be shuffled in place</h6>
   #foo = np.copy(df.values)
    #np.random.shuffle(df.values)
    #print(df.head()) #not shuffled
    #np.random.shuffle(foo)
    #print(foo[:4])
    #indices = np.random.permutation(foo.shape[0])
    #print(404290*.8, 404290*.2)
    #trainidx,testidx = indices[:((int)(404290*.8))],indices[((int)(404290*.8)):]
    #train,test=foo[trainidx,:],foo[testidx,:]
 

In [22]:
def save(X_train,X_valid,X_test,y_train,y_valid,y_test):
    save_single_file("X_train",X_train)
    save_single_file("X_valid",X_valid)
    save_single_file("X_test",X_test)
    save_single_file("y_train",y_train)
    save_single_file("y_valid",y_valid)
    save_single_file("y_test",y_test)
    
    
def save_single_file(filename,data):
    fh = open(filename+'.pkl', 'wb+')
    pickle.dump(data, fh)
    fh.close()
    
def load_single_file(filename):
    fh = open(filename+'.pkl','rb')
    data = pickle.load(fh)
    fh.close()
    return data

def load_data():
    X_train = load_single_file("X_train")
    X_valid = load_single_file("X_valid")
    X_test = load_single_file("X_test")
    y_train = load_single_file("y_train")
    y_valid = load_single_file("y_valid")
    y_test = load_single_file("y_test")
    return X_train, X_valid, X_test, y_train,y_valid, y_test
    
def make_dataset(path):
    '''
    input: path: path where quora_duplicate.tsv
    output: train, dev, valid tsv datasets
    '''
    input_file = 'quora_duplicate_questions.tsv'
    df = pd.read_csv(os.path.join(path,input_file),sep='\t')
    df = df.drop(["id","qid1","qid2"],axis=1)
    print(df.head())
    print(f"num rows dataframe:{len(df)}")
    print(df.values.shape)
    X_train, X_test, y_train, y_test = train_test_split(df[['question1','question2']].values, df[['is_duplicate']].values, test_size=0.40, random_state=42)
    X_test,X_valid,y_test,y_valid = train_test_split(X_test, y_test, test_size=0.50, random_state=42)
    print(f"X_train.shape:{X_train.shape} X_train.shape:{y_train.shape}")
    print(f"X_test.shape:{X_test.shape} y_test.shape:{y_test.shape}")
    print(f"X_valid.shape:{X_valid.shape} y_valid.shape:{y_valid.shape}")
    
    print(X_train[:6],y_train[:6])
    print('---------------------')
    print(X_test[:6],y_test[:6])
    print('---------------------')
    print(X_valid[:6],y_valid[:6])
    
    return X_train,X_valid,X_test,y_train,y_valid,y_test
    
X_train,X_valid,X_test,y_train,y_valid,y_test = make_dataset('/home/dc/cs230_project')
save(X_train,X_valid,X_test,y_train,y_valid,y_test)
X_train,X_valid,X_test,y_train,y_valid,y_test = load_data()

                                           question1  \
0  What is the step by step guide to invest in sh...   
1  What is the story of Kohinoor (Koh-i-Noor) Dia...   
2  How can I increase the speed of my internet co...   
3  Why am I mentally very lonely? How can I solve...   
4  Which one dissolve in water quikly sugar, salt...   

                                           question2  is_duplicate  
0  What is the step by step guide to invest in sh...             0  
1  What would happen if the Indian government sto...             0  
2  How can Internet speed be increased by hacking...             0  
3  Find the remainder when [math]23^{24}[/math] i...             0  
4            Which fish would survive in salt water?             0  
num rows dataframe:404290
(404290, 3)
X_train.shape:(242574, 2) X_train.shape:(242574, 1)
X_test.shape:(80858, 2) y_test.shape:(80858, 1)
X_valid.shape:(80858, 2) y_valid.shape:(80858, 1)
[['How should I prepare to get a job as a programmer in a sta

In [24]:
X_train,X_valid,X_test,y_train,y_valid,y_test = load_data()
print(X_train.shape,X_valid.shape,X_test.shape,y_train.shape,y_valid.shape,y_test.shape)

(242574, 2) (80858, 2) (80858, 2) (242574, 1) (80858, 1) (80858, 1)


In [None]:

#torch.tensor copies numpy array into CPU memory
a = torch.tensor(np.array([[1, 2, 3], [4, 5, 6]]))
print(type(a))
print(a.view(-1))
b=torch.tensor(np.array([[1,2],[3,4],[5,6]]))
print(a.size(),b.size())
print(b.view(-1)) #view flattens
#.numpy() to get back numpy 
print(type(a.numpy()),a.numpy())
#
print("current torch device",torch.cuda.current_device(),torch.cuda.is_available())
c = torch.cuda.FloatTensor(1000, 1000).fill_(0)
#images numpy is HxWxChannels, torch is channelsxHxW use transform.ToTensor
#
X1a = torch.rand((100,1))
X1b = torch.rand((100,1))
X2a = torch.rand(100,1)+.2
X2b = torch.rand(100,1)+.2

Y1 = torch.zeros([1,100],dtype=torch.int32)
Y2 = torch.ones([1,100],dtype=torch.int32)

X = torch.cat([X1a,X1b],dim=1)
Y = torch.cat([X2a,X2b],dim=1)
#print (X.size(),X.numpy())
#print (Y.size(),type(Y.numpy()))
plt.scatter(X[:,0],X[:,1],color='red')
plt.scatter(Y[:,0],Y[:,1],color='green')

plt.show()

In [None]:
#format matrix for DL. 50 features(columns) in a row giving 25 data points and 2000 examples/rows
X1 = torch.rand(1000,50)
X2 = torch.rand(1000,50) + .2
X = torch.cat([X1,X2],dim=0)
Y1 = torch.zeros(1000,1)
Y2 = torch.ones(1000,1)
Y = torch.cat([Y1,Y2],dim=0)
print(X.size(),Y1.size(),Y2.size(),Y.size())
plt.scatter(X1[:,2],X1[:,3],color="red")
plt.scatter(X2[:,0],X2[:,1],color="blue")

plt.show()

In [None]:
print(X.size(),Y.size())

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

class NN(nn.Module):
    def __init__(self):
        self.input_dim = 50
        super(NN,self).__init__()
        self.linear1 = nn.Linear(self.input_dim, self.input_dim)
        self.relu1 = nn.ReLU()
        self.linear2 = nn.Linear(self.input_dim,100)
        self.relu2 = nn.ReLU()
        self.linear3 = nn.Linear(100,1)
        self.sigmoid = nn.Sigmoid()
    def forward(self,X):
        self.l1_linear = self.linear1(X)
        self.l1_relu = self.relu1(self.l1_linear)
        self.l2_linear = self.linear2(self.l1_relu)
        self.l2_relu = self.relu2(self.l2_linear)
        self.l3_linear=self.linear3(self.l2_relu)
        self.l3_sigmoid = self.sigmoid(self.l3_linear)
        return self.l3_sigmoid

model  = NN()
opt = optim.Adam(model.parameters(), lr = .001)
criterion = nn.BCELoss()


In [None]:
def train(model,opt,criterion):
    model.train()
    batch_size=50
    losses=[]
    for i in range(0,X.size(0),batch_size):
        X_batch=Variable(X[i:i+batch_size,:])
        Y_batch=Variable(Y[i:i+batch_size,:])
        #print(X_batch.size(), Y_batch.size())
        opt.zero_grad()
        y_hat = model(X_batch)
        loss = criterion(y_hat,Y_batch)
        loss.backward()
        opt.step()
        print(i,loss.data.numpy())
        losses.append(loss.data.numpy())
        
    return losses

In [None]:
epochs=5
losses=[]
for x in range(epochs):
    losses += train(model,opt,criterion)
plt.plot(losses)
plt.show()

In [None]:
class SimpleLogistic(nn.Module):
    def __init__(self,input_size,num_classes):
        super(SimpleLogistic,self).__init__()
        self.linear = nn.Linear(input_size,num_classes)
        
    def forward(self,x):
        return self.linear(x)

num_classes=10
learning_rate = .001
num_epochs = 5
input_size = 28*28

model = SimpleLogistic(input_size,num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
for x in range(num_epochs):
    #
    for i,(images,labels) in enumerate(train_loader):
        images = Variable(images.view(-1,28*28))
        labels = Variable(labels)
        #print(type(images),type(labels))
        #print(images.numpy())
        #print(labels.numpy())
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs,labels)
        loss.backward()
        optimizer.step()
        if(i%100==0):
            print(f"epoch:{x} i:{i} loss:{loss}")
    

In [None]:
class LSTM(nn.Module):
    def __init__(self):
        super(LSTM, self).__init__()
        