In [2]:
#!pip install torch torchvision #--upgrade
!pip install  pytorch-lightning   

Collecting pytorch-lightning
  Downloading https://files.pythonhosted.org/packages/7e/3e/599dfe7b8c35ef9c72df4825d876c023fafe5e2618483ee3f3f2f4cdc3a9/pytorch-lightning-0.0.2.tar.gz
Collecting test-tube (from pytorch-lightning)
  Downloading https://files.pythonhosted.org/packages/3a/50/47ea5613be804c8e6e0b01b1719e1f8186b8bc626441002b141c8a962abb/test_tube-0.631.tar.gz
Collecting pandas>=0.20.3 (from test-tube->pytorch-lightning)
[?25l  Downloading https://files.pythonhosted.org/packages/19/74/e50234bc82c553fecdbd566d8650801e3fe2d6d8c8d940638e3d8a7c5522/pandas-0.24.2-cp36-cp36m-manylinux1_x86_64.whl (10.1MB)
[K    100% |████████████████████████████████| 10.1MB 651kB/s 
Building wheels for collected packages: pytorch-lightning, test-tube
  Building wheel for pytorch-lightning (setup.py) ... [?25ldone
[?25h  Stored in directory: /home/notebook/.cache/pip/wheels/63/8f/d7/24693e99c7103d87e84c6b7905ff053541ffed249979cb22e5
  Building wheel for test-tube (setup.py) ... [?25ldone
[?25h  

In [6]:
import os
import sys

from test_tube import HyperOptArgumentParser, Experiment
from pytorch_lightning.models.trainer import Trainer
from pytorch_lightning.utils.arg_parse import add_default_args
from pytorch_lightning.utils.pt_callbacks import EarlyStopping, ModelCheckpoint
from pytorch_lightning.example_model import ExampleModel


def main(hparams):
    """
    Main training routine specific for this project
    :param hparams:
    :return:
    """
    # init experiment
    exp = Experiment(
        name=hparams.tt_name,
        debug=hparams.debug,
        save_dir=hparams.tt_save_path,
        version=hparams.hpc_exp_number,
        autosave=False,
        description=hparams.tt_description
    )

    exp.argparse(hparams)
    exp.save()

    model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name, exp.version)

    # build model
    model = ExampleModel(hparams)

    # callbacks
    early_stop = EarlyStopping(monitor='val_acc', patience=3, mode='min', verbose=True)
    checkpoint = ModelCheckpoint(filepath=model_save_path, save_function=None, save_best_only=True, verbose=True, monitor='val_acc', mode='min')

    # configure trainer
    trainer = Trainer(experiment=exp, checkpoint_callback=checkpoint, early_stop_callback=early_stop)

    # train model
    trainer.fit(model)


if __name__ == '__main__':

    # use default args given by lightning
    root_dir = os.path.split(os.path.dirname(sys.modules['__main__'].__file__))[0]
    parent_parser = HyperOptArgumentParser(strategy='random_search', add_help=False)
    add_default_args(parent_parser, root_dir)

    # allow model to overwrite or extend args
    parser = ExampleModel.add_model_specific_args(parent_parser)
    hyperparams = parser.parse_args()

    # train model
    main(hyperparams)

ModuleNotFoundError: No module named 'pytorch_lightning.example_model'

In [3]:
import torch
print('GPU available to Torch: {}'.format(torch.cuda.is_available()))

import os
os.getcwd()

import pandas as pd
df = pd.read_csv('F:/OneDrive - NTNU/UTR/data2.csv')
df.head()
df.describe()

import numpy as np
df['RFPlog']=np.log2(data['Fluorescence'])
df['RFPlog'].hist()

df['ReadsLog']=np.log2(df['#Reads Col'])
df['ReadsLog'].hist()
import matplotlib.pyplot as plt
plt.scatter(df['RFPlog'],df['ReadsLog'])

sorted_inds = df.sort_values('RFPlog').index.values
train_inds = sorted_inds[:int(0.1*len(sorted_inds))] # 95% of the data as the training set
test_inds = sorted_inds[int(0.1*len(sorted_inds)):] # UTRs with most reads at time point 0 as the test set
val_idx = int(0.9*len(train_inds))
val_inds = train_inds[val_idx:]
train_inds = train_inds[:val_idx]
print(len(train_inds))

import sys
import torch
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd
import numpy as np
import random
from ipywidgets import IntProgress
from tqdm import tqdm_notebook as tqdm

class DNADataset(Dataset):
    def __init__(self, df, seq_len):
        self.data = df
        self.bases = ['A','C','G','T']
        self.base_dict = dict(zip(self.bases,range(4))) # {'A' : 0, 'C' : 1, 'G' : 2, 'T' : 3}
        self.total_width = seq_len + 20
    def __len__(self):
        return (self.data.shape[0])
    def __getitem__(self, idx):
        seq = self.data.iloc[idx].UTR
        X = np.zeros((1, 4, self.total_width))
        y = self.data.iloc[idx].growth_rate
        for b in range(len(seq)):
            # this will assign a 1 to the appropriate base and position for this UTR sequence
            X[0, self.base_dict[seq[b]], int(b + round((self.total_width - len(seq))/2.))] = 1.
        return(seq, X, y)
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=128, kernel_size=(4, 13))
        self.dropout = nn.Dropout(p=0.15)
        self.conv2 = nn.Conv2d(128, 128, (1,13))
        self.fc1 = nn.Linear(128 * 1 * 34, 64)
        self.fc2 = nn.Linear(64, 1)
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.dropout(x)
        x = F.relu(self.conv2(x))
        x = self.dropout(x)
        x = F.relu(self.conv2(x))
        x = self.dropout(x)
        x = x.view(-1, 128 * 1 * 34)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

net = Net()
#net = net.cuda()

criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters())

train_data = DNADataset(df.iloc[train_inds], seq_len=50)
val_data = DNADataset(df.iloc[val_inds], seq_len=50)
test_data = DNADataset(df.iloc[test_inds], seq_len=50)
train_data_loader = DataLoader(train_data, batch_size=32,shuffle=True, num_workers=4)

val_data_loader = DataLoader(val_data, batch_size=32) # Validate everything in one batch?!

test_data_loader = DataLoader(test_data, batch_size=len(test_data)) # Validate everything in one batch?!

for epoch in range(10):
    for i_batch, sampled_batch in enumerate(tqdm(train_data_loader)):
        sequence, transformed_sequence, growth_rate = sampled_batch
        #inputs, labels = Variable(transformed_sequence.float().cuda()), Variable(growth_rate.float().cuda())
        inputs, labels = Variable(transformed_sequence.float()), Variable(growth_rate.float())
        optimizer.zero_grad()
        net.train()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    error = 0
    totalE = 0
    net.eval()
    for batch in tqdm(val_data_loader):
      v_seq, X_v, y_v = batch
      v_pred = net(Variable(X_v.float()))#.cuda()))
      totalE = totalE + y_v.size(0)
      raw_error = v_pred[:,0].data - y_v.float()#.cuda()
      error += (raw_error**2).sum()
      avg_mse = error / float(totalE)

torch.save(net, 'saved_model.t7')

error = 0
total = 0
for batch in tqdm(val_data_loader):
                v_seq, X_v, y_v = batch
                v_pred = net(Variable(X_v.float()))#.cuda()))
                total += y_v.size(0)
                raw_error = v_pred[:,0].data - y_v.float()#.cuda()
                error += (raw_error**2).sum()

avg_mse = error / float(total)
print("Validation error: {}".format(avg_mse))

error = 0
total = 0
for batch in tqdm(test_data_loader):
                v_seq, X_v, y_v = batch
                v_pred = net(Variable(X_v.float()))#.cuda()))
                total += y_v.size(0)
                raw_error = v_pred[:,0].data - y_v.float()#.cuda()
                error += (raw_error**2).sum()

avg_mse = error / float(total)
print("Test error: {}".format(avg_mse))


GPU available to Torch: True


FileNotFoundError: [Errno 2] File b'F:/OneDrive - NTNU/UTR/data2.csv' does not exist: b'F:/OneDrive - NTNU/UTR/data2.csv'

In [3]:
!pip install fastai --user

Collecting fastai
  Using cached https://files.pythonhosted.org/packages/98/6c/6b6d34dc783f1535190bfafbc5c0063656f06a3dd5c37b2b2b7bd0e8011e/fastai-1.0.46-py3-none-any.whl
Collecting torch>=1.0.0 (from fastai)
  Using cached https://files.pythonhosted.org/packages/31/ca/dd2c64f8ab5e7985c4af6e62da933849293906edcdb70dac679c93477733/torch-1.0.1.post2-cp36-cp36m-manylinux1_x86_64.whl
Collecting nvidia-ml-py3 (from fastai)
Collecting fastprogress>=0.1.19 (from fastai)
  Using cached https://files.pythonhosted.org/packages/86/30/01f597392e4e7b4982f387028da941e1fd60a8d53511d17225858d87fb22/fastprogress-0.1.20-py3-none-any.whl
Collecting spacy>=2.0.18 (from fastai)
  Using cached https://files.pythonhosted.org/packages/ae/6e/a89da6b5c83f8811e46e3a9270c1aed90e9b9ee6c60faf52b7239e5d3d69/spacy-2.0.18-cp36-cp36m-manylinux1_x86_64.whl
Collecting bottleneck (from fastai)
Collecting preshed<2.1.0,>=2.0.1 (from spacy>=2.0.18->fastai)
  Using cached https://files.pythonhosted.org/packages/20/93/f222fb95

In [4]:
import torch
torch.cuda.is_available() 

True

In [7]:
#coding https://mattmazur.com/2015/03/17/a-step-by-step-backpropagation-example/ with pytorch, checking with iterative version at  https://github.com/animesh/ann/blob/master/ann/Program.cs with following output
#Iteration = 1   Error = 0.298371108760003       Outputs = 0.751365069552316     0.772928465321463
#Iteration = 2   Error = 0.291027773693599       Outputs = 0.742088111190782     0.775284968294459  ...

inp=[0.05,0.10]
inpw=[[0.15,0.20],[0.25,0.3]]
hidw=[[0.4,0.45],[0.5,0.55]]
outputr=[0.01,0.99]
bias=[0.35,0.6]
lr=0.5
print(hidw,inpw)

[[0.4, 0.45], [0.5, 0.55]] [[0.15, 0.2], [0.25, 0.3]]


In [8]:
#"ALL YOU NEED IS A GOOD INIT" https://arxiv.org/pdf/1511.06422.pdf
import numpy as np
inpw=np.random.randn(len(outputr),int(len(inp)*np.sqrt(2/len(inp))))
hidw=np.random.randn(len(outputr),int(len(inp)*np.sqrt(2/len(inp))))
print(inpw,hidw)

[[ 1.84772999  0.31633614]
 [-0.85602802 -1.39846585]] [[ 0.20957657  0.20203366]
 [ 0.27245755 -0.70527726]]


In [9]:
Device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(Device)
x=torch.tensor(inp, dtype=torch.double, device=Device)
y=torch.tensor(outputr, dtype=torch.double, device=Device)
b=torch.tensor(bias, dtype=torch.double, device=Device)
w1=torch.tensor(inpw, dtype=torch.double, device=Device)
w2=torch.tensor(hidw, dtype=torch.double, device=Device)
print(x.size(),y,b,w1.size(),w2)

cuda:0
torch.Size([2]) tensor([0.0100, 0.9900], device='cuda:0', dtype=torch.float64) tensor([0.3500, 0.6000], device='cuda:0', dtype=torch.float64) torch.Size([2, 2]) tensor([[ 0.2096,  0.2020],
        [ 0.2725, -0.7053]], device='cuda:0', dtype=torch.float64)


In [10]:
iter=0
while iter<1:
    iter+=1
    h = torch.sigmoid(x.matmul(w1.transpose(0,1))+b[0])
    y_pred = torch.sigmoid(h.matmul(w2.transpose(0,1))+b[1])
    print("iteration:",iter,"MSE: ",0.5*(((y_pred - y).pow(2)).sum()))
    print(w1)
    print(w2)

iteration: 1 MSE:  tensor(0.3147, device='cuda:0', dtype=torch.float64)
tensor([[ 1.8477,  0.3163],
        [-0.8560, -1.3985]], device='cuda:0', dtype=torch.float64)
tensor([[ 0.2096,  0.2020],
        [ 0.2725, -0.7053]], device='cuda:0', dtype=torch.float64)


In [6]:
#https://medium.com/dair-ai/a-simple-neural-network-from-scratch-with-pytorch-and-google-colab-c7f3830618e0
class Neural_Network(torch.nn.Module):
    def __init__(self, ):
        super(Neural_Network, self).__init__()
        self.W1 = w1.transpose(0,1) # 3 x 2 tensor
        self.W2 = w2.transpose(0,1)
    def forward(self, x):
        self.z = torch.matmul(x, self.W1) # 3 x 3 ".dot" does not broadcast in PyTorch
        self.z2 = self.sigmoid(self.z) # activation function
        self.z3 = torch.matmul(self.z2, self.W2)
        o = self.sigmoid(self.z3) # final activation function
        return o
    def sigmoid(self, s):
        return 1 / (1 + torch.exp(-s))
    def sigmoidPrime(self, s):
        return s * (1 - s)
    def backward(self, x, y, o):
        self.o_error = y - o # error in output
        self.o_delta = self.o_error * self.sigmoidPrime(o) # derivative of sig to error
        self.z2_error = torch.matmul(self.o_delta, torch.t(self.W2))
        self.z2_delta = self.z2_error * self.sigmoidPrime(self.z2)
        self.W1 += torch.matmul(x, self.z2_delta.transpose(0,1)) #torch.sigmoid(x.matmul(w1.transpose(0,1))+b[0])
        self.W2 += torch.matmul(self.z2, self.o_delta)
    def train(self, x, y):
        o = self.forward(x)
        self.backward(x, y, o)
    def saveWeights(self, model):
        torch.save(model, "NN")
    def predict(self):
        print ("Predicted weights: ")
        print ("Input (scaled): \n" + str(xPredicted))
        print ("Output: \n" + str(self.forward(xPredicted)))

In [7]:
NN = Neural_Network()
for i in range(1000):  # trains the NN 1,000 times
    print ("#" + str(i) + " Loss: " + str(torch.mean((y - NN(x))**2).detach().item()))  # mean sum squared loss
    NN.train(x, y)
NN.saveWeights(NN)
NN.predict()

#0 Loss: 0.24251985734837728


RuntimeError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation
%matplotlib inline
import seaborn as sns
import itertools
sns.set(style='white', rc={'figure.figsize':(14, 12)})
from sklearn.datasets import load_digits
from umap import UMAP

  return f(*args, **kwds)


In [9]:
#!pip3 install hypertools
#!pip3 uninstall UMAP
!pip3 install ffmpeg

Collecting ffmpeg
  Downloading https://files.pythonhosted.org/packages/f0/cc/3b7408b8ecf7c1d20ad480c3eaed7619857bf1054b690226e906fdf14258/ffmpeg-1.4.tar.gz
Building wheels for collected packages: ffmpeg
  Building wheel for ffmpeg (setup.py) ... [?25ldone
[?25h  Stored in directory: /home/notebook/.cache/pip/wheels/b6/68/c3/a05a35f647ba871e5572b9bbfc0b95fd1c6637a2219f959e7a
Successfully built ffmpeg
Installing collected packages: ffmpeg
Successfully installed ffmpeg-1.4


In [2]:
digits = load_digits()
data = digits.data
data

array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ..., 10.,  0.,  0.],
       [ 0.,  0.,  0., ..., 16.,  9.,  0.],
       ...,
       [ 0.,  0.,  1., ...,  6.,  0.,  0.],
       [ 0.,  0.,  2., ..., 12.,  0.,  0.],
       [ 0.,  0., 10., ..., 12.,  1.,  0.]])

In [3]:
def tween(e1, e2, n_frames=20):
    for i in range(5):
        yield e1
    for i in range(n_frames):
        alpha = i / float(n_frames - 1)
        yield (1 - alpha) * e1 + alpha * e2
    for i in range(5):
        yield(e2)
    return

In [4]:
def generate_frame_data(data, arg_name='n_neighbors', arg_list=[3,4,5,7,9,11,15,20,25,30,40,60,100]):
    result = []
    es = []
    for arg in arg_list:
        kwargs = {arg_name:arg}
        if len(es) > 0:
            es.append(UMAP(init=es[-1], negative_sample_rate=3, **kwargs).fit_transform(data))
        else:
            es.append(UMAP(negative_sample_rate=3, **kwargs).fit_transform(data))
        
    for e1, e2 in zip(es[:-1], es[1:]):
        result.extend(list(tween(e1, e2)))
        
    return result

In [5]:
def create_animation(frame_data, arg_name='n_neighbors', arg_list=[3,4,5,7,9,11,15,20,25,30,40,60,100]):
    fig = plt.figure()
    all_data = np.vstack(frame_data)
    frame_bounds = (all_data[:, 0].min() * 1.1, 
                    all_data[:, 0].max() * 1.1,
                    all_data[:, 1].min() * 1.1, 
                    all_data[:, 1].max() * 1.1)
    ax = plt.axes(xlim=(frame_bounds[0], frame_bounds[1]), 
                  ylim=(frame_bounds[2], frame_bounds[3]))
    points = plt.scatter(frame_data[0][:, 0], frame_data[0][:, 1], 
                        s=5, c=digits.target, cmap='Spectral', animated=True)
    title = plt.title('', fontsize=24)
    ax.set_xticks([])
    ax.set_yticks([])
    cbar = plt.colorbar(values=np.arange(10), boundaries=np.arange(11)-0.5, ticks=np.arange(10), drawedges=True)
    cbar.ax.yaxis.set_ticklabels(np.arange(10), fontsize=18)

    def init():
        points.set_offsets(frame_data[0])
        title.set_text('UMAP with {}={}'.format(arg_name, arg_list[0]))
        return points,

    def animate(i):
        points.set_offsets(frame_data[i])
        if (i + 15) % 30 == 0:
            title.set_text('UMAP with {}={}'.format(arg_name, arg_list[(i + 15) // 30]))
        return points,

    anim = animation.FuncAnimation(fig, animate, init_func=init, frames=len(frame_data), interval=20, blit=True)
    anim.save('umap_anim-{}.gif'.format(arg_name), writer='imagemagick', fps=30)

In [6]:
def animate_param(data, arg_name='n_neighbors', arg_list=[3,4,5,7,9,11,15,20,25,30,40,60,100]):
    frame_data = generate_frame_data(data, arg_name, arg_list)
    create_animation(frame_data, arg_name, arg_list)

In [13]:
#import ffmpeg 
!pip3 install imagemagick
#animate_param(data, 'n_neighbors', [3,4,5,7,10,15])
#animate_param(data, 'min_dist', [0.0, 0.01, 0.1, 0.2, 0.4, 0.6, 0.9])
#animate_param(data, 'gamma', [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0])
#animate_param(data, 'local_connectivity', [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0])
#animate_param(data, 'bandwidth', [1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0])
#animate_param(data, 'set_op_mix_ratio', np.linspace(0.0, 1.0, 10))

Collecting imagemagick
[31m  Could not find a version that satisfies the requirement imagemagick (from versions: )[0m
[31mNo matching distribution found for imagemagick[0m


In [None]:
import pandas as pd
data = pd.read_csv("posw.csv")
data.head()

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
data['RFPlog']=np.log2(data['RFP'])
data.hist()

In [None]:
from functools import reduce
DNAortho = ('A','1000') , ('T','0100') ,  ('G','0010'), ('C','0001')
data['DNASeqOrtho']=reduce(lambda a, kv: a.str.replace(*kv), DNAortho, data['DNASeq'])
data['DNASeqOrtho']

In [None]:
X=Variable(torch.Tensor(data['DNASeqOrtho'].apply(lambda x: float(x.find('10'))).reshape(-1,1))).cuda()
y=Variable(torch.Tensor(data['RFPlog'].reshape(-1,1))).cuda()
X, y
#int(data['DNASeqOrtho'].sum(),2)
#data['DNASeqOrtho'].apply(lambda x: x.find('1'))

In [None]:
class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegressionModel, self).__init__() 
        self.linear = nn.Linear(input_dim, output_dim,bias=True).cuda()
    def forward(self, x):
        out = self.linear(x)
        return out

In [None]:
input_dim = 1
output_dim = 1
l_rate = 0.01
model = LinearRegressionModel(input_dim,output_dim)
criterion = nn.MSELoss()
optimiser = torch.optim.SGD(model.parameters(), lr = l_rate)

In [None]:
epochs = 100
for epoch in range(epochs):
    epoch +=1
    optimiser.zero_grad()
    outputs = model.forward(X)
    loss = criterion(outputs, y)
    loss.backward()# back props
    optimiser.step()# update the parameters
    print('epoch {}, loss {}'.format(epoch,loss.data[0]))

In [None]:
predicted =model.forward(X).cpu().data.numpy()
#plt.plot(X, y, 'i', label = 'from data', alpha = .5)
plt.plot(y.cpu().data.numpy(), predicted, label = 'prediction', alpha = 0.5)
plt.legend()
plt.show()
print(model.state_dict())

In [None]:
#http://pytorch.org/tutorials/beginner/blitz/data_parallel_tutorial.html#sphx-glr-beginner-blitz-data-parallel-tutorial-py
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
input_size = 5
output_size = 2
batch_size = 30
data_size = 100

In [None]:
class RandomDataset(Dataset):

    def __init__(self, size, length):
        self.len = length
        self.data = torch.randn(length, size)

    def __getitem__(self, index):
        return self.data[index]

    def __len__(self):
        return self.len

rand_loader = DataLoader(dataset=RandomDataset(input_size, 100),
                         batch_size=batch_size, shuffle=True)

In [None]:
class Model(nn.Module):
    # Our model

    def __init__(self, input_size, output_size):
        super(Model, self).__init__()
        self.fc = nn.Linear(input_size, output_size)

    def forward(self, input):
        output = self.fc(input)
        print("  In Model: input size", input.size(),
              "output size", output.size())
        return output

In [None]:
model = Model(input_size, output_size)
if torch.cuda.device_count() > 1:
  print("Let's use", torch.cuda.device_count(), "GPUs!")
  # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
  model = nn.DataParallel(model)

if torch.cuda.is_available():
   model.cuda()

In [None]:
for data in rand_loader:
    if torch.cuda.is_available():
        input_var = Variable(data.cuda())
    else:
        input_var = Variable(data)

    output = model(input_var)
    print("Outside: input size", input_var.size(),
          "output_size", output.size())

In [None]:
#https://www.youtube.com/watch?time_continue=96&v=vMZ7tK-RYYc
import numpy as np
import time

from numba import vectorize, cuda

#@vectorize(['float32(float32,float32)'],target='cuda')
def subVector(ε,σ):
    return ε + σ

Elements=10000
A=np.ones(Elements,dtype=np.float32)
B=np.ones(Elements,dtype=np.float32)
C=subVector(A,B)
ts=time.time()
te=time.time()
print(C,te-ts)

In [None]:
import pyro
from pyro.distributions import Normal
from pyro.infer import SVI
from pyro.optim import Adam