<a href="https://colab.research.google.com/github/conniaren/GenotypeImputationProject/blob/master/2-Models/autoencoder_model_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')
import scipy
from scipy import sparse 
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
from torch import nn, optim 

Mounted at /content/gdrive


In [17]:
#upload dataset
dataset = sparse.load_npz('gdrive/My Drive/Colab Notebooks/output_matrix.npz')
dense_dataset = dataset.todense()

#initialize dataloader for dataset
genotype_dataset = TensorDataset(torch.tensor(dense_dataset, dtype=torch.float64))
dataloader = DataLoader(genotype_dataset, batch_size = 10, shuffle = True, num_workers=2, pin_memory=True)

class autoencoder_model_1 (nn.Module):
  def __init__(self, **kwargs):
    super().__init__()
    self.input_layer = nn.Linear(in_features = kwargs["input_shape"], out_features = 64)
    self.encoder_layer = nn.Linear (in_features = 64, out_features = 64)
    self.decoder_layer = nn.Linear (in_features = 64, out_features = 64)
    self.output_layer = nn.Linear (in_features = 64, out_features = kwargs["input_shape"])
    self.double()

  def forward (self, features):
    input = self.input_layer(features)
    input_activation = torch.relu(input)

    encode = self.encoder_layer(input_activation)
    encode_activation = torch.relu(encode)

    decode = self.decoder_layer(encode_activation)
    decode_activation = torch.relu(decode)

    reconstruction = self.output_layer(decode_activation)
    reconstruction = torch.relu (reconstruction)

    return reconstruction

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = autoencoder_model_1(input_shape = 264695).to(device)
optimizer = optim.Adam(model.parameters(), lr = 1e-3)
reconstruction_loss = nn.MSELoss()

In [19]:
epochs = 10
for epoch in range(epochs):
  loss = 0 
  for batch_features in dataloader:
    #unpack 
    batch_features = batch_features[0].view(-1,264695).to(device)
    #reset gradients to 0
    optimizer.zero_grad()
    # forward propogation 
    outputs = model(batch_features)
    # calculate the loss
    train_loss = reconstruction_loss(outputs,batch_features)
    #backprop
    train_loss.backward()
    #update parameters with Adam optimizer 
    optimizer.step()
    # sum the total loss across all samples
    loss += train_loss.item()

  # take the average loss in an epoch
  loss = loss/len(dataloader)
  print( "epoch: {}/{}, loss: {:.6f}".format(epoch+1, epochs, loss))

epoch: 1/10, loss: 0.681486
epoch: 2/10, loss: 0.402392
epoch: 3/10, loss: 0.354831
epoch: 4/10, loss: 0.319993
epoch: 5/10, loss: 0.308953
epoch: 6/10, loss: 0.306715
epoch: 7/10, loss: 0.306212
epoch: 8/10, loss: 0.306045
epoch: 9/10, loss: 0.305980
epoch: 10/10, loss: 0.306031


In [None]:
torch.save(model.state_dict(), 'checkpoint.pth')
from google.colab import files 
files.download('checkpoint.pth')

In [None]:
state_dict = torch.load('checkpoint.pth')
model.load_state_dict(state_dict)