# 1. Packeges

In [8]:
# Liberies
import os
import glob
import torch
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

# 2. Data preprocessing

In [None]:
path = '../data/new AT cases/'
os.chdir(path)
files = glob.glob('*.csv')
print(files)

In [None]:
# function to isolate no. of mutations
def mut_count(file, chr_list):

    # created the dataframe
    df = pd.read_csv(file, low_memory=False)
    df = df.iloc[:, 77]  # only the chr column
    name = file[:-4]     # name of the input file

     
    chr_count = []
    chr_values = df.value_counts()
    chr_names = df.unique().tolist()

    # counts no. of mutation in each chr
    for gene in chr_list:
        if gene in chr_names:
            chr_count.append(chr_values[gene])
        else:
            chr_count.append(0)

    # creates the mutation count dataframe
    dic = {'Chromosome': chr_list, name: chr_count}
    df_chr = pd.DataFrame.from_dict(dic)

    return(df_chr)

In [None]:
df = pd.read_csv(files[0])
chr_names_list = df.iloc[:, 77].unique().tolist()

# start col
result = mut_count(files[0], chr_names_list)

#merging results
for file in tqdm(files[1:]):
    tmp = mut_count(file, chr_names_list)
    result = pd.concat([result, tmp.iloc[:,1]], axis=1)
    
#df.to_csv('mut_count_all.csv', index=False)
result.to_csv('mut_count_all.csv', index=False)
result 

# 3. Data processing

In [9]:
# Mutation counts of patients as an input to neural net
muts = pd.read_csv('../data/new AT cases/mut_count_all.csv')  
muts.head()

Unnamed: 0,Chromosome,EX466,EX7777,EX870,EX882,EX897,EX899,EX915,EX916,EX927R
0,chr1,13764,2237,14655,12923,12703,13313,12678,12923,12768
1,chr2,10209,1019,10910,9400,9764,9694,9068,8997,9415
2,chr3,7427,835,8463,7138,7218,6830,6903,7161,7093
3,chr4,5875,641,6608,5415,5436,5705,5370,5461,5324
4,chr5,5877,789,6709,5447,6033,5934,6011,5684,5730


In [10]:
patients = muts.columns.values.tolist()[1:]
patients

['EX466',
 'EX7777',
 'EX870',
 'EX882',
 'EX897',
 'EX899',
 'EX915',
 'EX916',
 'EX927R']

In [11]:
# Individual normalizing
def norm_ind(tensor):
    ts = tensor/tensor.max()
    return(ts)

In [12]:
# ====== inputs  ====== #

x = [] # muts_norm_glob


# making the two sets of inputs and their labels
for patient in patients:
    ts = torch.FloatTensor(muts[patient])
    x.append(norm_ind(ts).tolist())

x = torch.FloatTensor(x)
x[0]

tensor([1.0000, 0.7417, 0.5396, 0.4268, 0.4270, 0.4560, 0.5663, 0.3638, 0.4303,
        0.4453, 0.6061, 0.5548, 0.2039, 0.3498, 0.3372, 0.4346, 0.5857, 0.1766,
        0.6691, 0.2470, 0.1494, 0.2723, 0.0039, 0.1757, 0.0046])

# 4. Prediction

In [13]:
from torch import nn, optim
import torch.nn.functional as F

# The neural net
class Net(nn.Module):
  def __init__(self, n_features):
    super(Net, self).__init__()
    self.fc1 = nn.Linear(n_features, 12)
    self.fc2 = nn.Linear(12, 5)
    self.fc3 = nn.Linear(5, 1)
    
  def forward(self, x):
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    return torch.sigmoid(self.fc3(x))


# Specify a path
conf = '../code/model3.pt'

# Load the saved weights
net = Net(x.shape[1])
net.load_state_dict(torch.load(conf))
net.eval()

Net(
  (fc1): Linear(in_features=25, out_features=12, bias=True)
  (fc2): Linear(in_features=12, out_features=5, bias=True)
  (fc3): Linear(in_features=5, out_features=1, bias=True)
)

In [14]:
def predict_type(muts_tensor):
  t = muts_tensor
  output = net(t)
  return output #.ge(0.5).item()

In [15]:
for i, patient in enumerate(patients):
    pred = predict_type(x[i])
    print(patient, pred[0])


EX466 tensor(0.0230, grad_fn=<SelectBackward>)
EX7777 tensor(1.0000, grad_fn=<SelectBackward>)
EX870 tensor(0.1845, grad_fn=<SelectBackward>)
EX882 tensor(0.0837, grad_fn=<SelectBackward>)
EX897 tensor(0.4034, grad_fn=<SelectBackward>)
EX899 tensor(0.9990, grad_fn=<SelectBackward>)
EX915 tensor(0.2573, grad_fn=<SelectBackward>)
EX916 tensor(0.0044, grad_fn=<SelectBackward>)
EX927R tensor(0.9997, grad_fn=<SelectBackward>)


In [36]:
w = []
b = []

for name, param in model.named_parameters():
    if name == 'fc1.weight':
        w.append(param.data)
    elif name == 'fc1.bias':
        b.append(param.data)
    else:
        pass


In [68]:
chrs = muts['Chromosome'].tolist()
wb = {}

# making a dic with weight and biad of each chr
for i, choromosome in enumerate(chrs):
    tmp_w = []
    tmp_b = []
    for j in range(len(w[0])):
        tmp_w.append(w[0][j][i])
        #tmp_b.append(b[0][j])
    wb[choromosome] = tmp_w


In [71]:
for choromosome in chrs:
    a = torch.FloatTensor(wb[choromosome]).mean()
    print (choromosome,':', a)

chr1 : tensor(-0.0392)
chr2 : tensor(0.0117)
chr3 : tensor(0.0022)
chr4 : tensor(-0.0085)
chr5 : tensor(-0.0258)
chr6 : tensor(0.0036)
chr7 : tensor(-0.0062)
chr8 : tensor(0.0106)
chr9 : tensor(-0.0130)
chr10 : tensor(-0.0477)
chr11 : tensor(0.0127)
chr12 : tensor(-0.0252)
chr13 : tensor(0.0394)
chr14 : tensor(0.0373)
chr15 : tensor(0.0131)
chr16 : tensor(0.0356)
chr17 : tensor(0.0166)
chr18 : tensor(-0.0107)
chr19 : tensor(0.0153)
chr20 : tensor(-0.0628)
chr21 : tensor(0.0256)
chr22 : tensor(-4.9683e-06)
chrM : tensor(0.0515)
chrX : tensor(-0.0061)
chrY : tensor(0.0491)


In [30]:
model = Net(x.shape[1])
#for param in model.parameters():
for name, param in model.named_parameters():
    print(name)  #,'\n',param.data)
    for i in range(len(param.data)):
        print(i+1,':',param.data[i].mean())

fc1.weight
1 : tensor(-0.0009)
2 : tensor(-0.0094)
3 : tensor(0.0501)
4 : tensor(-0.0115)
5 : tensor(-0.0040)
6 : tensor(0.0318)
7 : tensor(-0.0185)
8 : tensor(-0.0331)
9 : tensor(0.0173)
10 : tensor(0.0100)
11 : tensor(0.0149)
12 : tensor(-0.0088)
fc1.bias
1 : tensor(0.0601)
2 : tensor(0.0304)
3 : tensor(0.0449)
4 : tensor(-0.1819)
5 : tensor(-0.0080)
6 : tensor(0.0163)
7 : tensor(0.0624)
8 : tensor(0.1811)
9 : tensor(0.0838)
10 : tensor(-0.1470)
11 : tensor(0.1099)
12 : tensor(0.0239)
fc2.weight
1 : tensor(-0.0193)
2 : tensor(-0.0019)
3 : tensor(0.0803)
4 : tensor(-0.0396)
5 : tensor(-0.0961)
fc2.bias
1 : tensor(-0.1089)
2 : tensor(0.0446)
3 : tensor(0.0872)
4 : tensor(-0.2360)
5 : tensor(-0.0926)
fc3.weight
1 : tensor(-0.0031)
fc3.bias
1 : tensor(0.4145)
