<a href="https://colab.research.google.com/github/conniaren/GenotypeImputationProject/blob/master/2-Models/autoencoder_model_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -qqq wandb pytorch-lightning requests

[K     |████████████████████████████████| 1.7 MB 11.7 MB/s 
[K     |████████████████████████████████| 1.0 MB 46.2 MB/s 
[K     |████████████████████████████████| 97 kB 6.9 MB/s 
[K     |████████████████████████████████| 180 kB 46.5 MB/s 
[K     |████████████████████████████████| 139 kB 47.6 MB/s 
[K     |████████████████████████████████| 63 kB 1.8 MB/s 
[K     |████████████████████████████████| 132 kB 51.3 MB/s 
[K     |████████████████████████████████| 596 kB 53.7 MB/s 
[K     |████████████████████████████████| 829 kB 49.0 MB/s 
[K     |████████████████████████████████| 329 kB 43.9 MB/s 
[K     |████████████████████████████████| 1.1 MB 39.8 MB/s 
[K     |████████████████████████████████| 160 kB 51.4 MB/s 
[K     |████████████████████████████████| 271 kB 46.7 MB/s 
[K     |████████████████████████████████| 192 kB 52.6 MB/s 
[?25h  Building wheel for subprocess32 (setup.py) ... [?25l[?25hdone
  Building wheel for future (setup.py) ... [?25l[?25hdone
  Building wheel f

In [None]:
import scipy
from scipy import sparse 
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset, TensorDataset
from torchvision import transforms
from torchvision.transforms import Compose
from torch import nn, optim 
import wandb
import pytorch_lightning as pl
import torch.nn.functional as F
import io
import requests
from sklearn.model_selection import KFold

In [None]:
r = requests.get("https://drive.google.com/uc?export=download&id=1UraCevZUlKeCHOtYd5PeMX2-hwNVb93O")
buf = io.BytesIO(r.content)
#wandb.login()

In [None]:
#upload dataset
dataset = sparse.load_npz(buf).todense()

'''
#adding 5% noise to the data (setting random genotypes to 0)
for i in range(dense_dataset.shape[0]):
  noise_percent = int(0.05 * dense_dataset.shape[1])
  noise_indices = np.random.randint(dense_dataset.shape[1], size= noise_percent)
  dense_dataset[i, noise_indices] = 0
'''

class NoisyDataset(Dataset):
  
  def __init__(self,datasetnoised,datasetclean,transform):
    self.noise=datasetnoised
    self.clean=datasetclean
    self.transform=transform
  
  def __len__(self):
    return len(self.noise)
  
  def __getitem__(self,idx):
    xNoise=self.noise[idx]
    xClean=self.clean[idx]
    
    if self.transform != None:
      xNoise=self.transform(xNoise)
      xClean=self.transform(xClean)
    return (xNoise,xClean)

#initialize dataloader for dataset

transform=Compose([
    transforms.ToTensor()
])

#genotype_dataset=NoisyDataset(dense_dataset,clean,transform)
#print (genotype_dataset[0])

In [None]:
hw_dataset = np.mean(dataset,axis = 0)/2
pp = (np.square(hw_dataset)).T
q = [1-x for x in hw_dataset]
pq = 2*np.multiply(hw_dataset,q[0]).T
qq = (np.square(q[0])).T
hw_dataset = np.concatenate([qq,pq,pp], axis = 1)

hw_variants = np.argmax(hw_dataset, axis= 1)
baseline_hw_dataset = np.repeat(hw_variants.T, 503, axis = 0)
print(baseline_hw_dataset)

[[0 0 0 ... 0 1 0]
 [0 0 0 ... 0 1 0]
 [0 0 0 ... 0 1 0]
 ...
 [0 0 0 ... 0 1 0]
 [0 0 0 ... 0 1 0]
 [0 0 0 ... 0 1 0]]


In [None]:
from sklearn.decomposition import PCA 
from sklearn.preprocessing import StandardScaler, OneHotEncoder
def PCA (dataset, components=200):
  reduction = PCA(components)
  reduction.fit(dataset)
  return reduction.transform(dataset)

def standardize (dataset):
  standard = StandardScaler()
  return standard.fit_transform(dataset)

def one_hot (dataset):
  one_hot_encoding = OneHotEncoder(handle_unknown='ignore')
  return one_hot_encoding.fit_transform(dataset).toarray()

In [None]:
standardized_dataset = standardize(dataset)
print(standardized_dataset.shape)

one_hot_dataset = one_hot(dataset)
print(one_hot_dataset)

(503, 264695)
[[1. 0. 0. ... 0. 1. 0.]
 [1. 0. 0. ... 0. 1. 0.]
 [1. 0. 0. ... 0. 1. 0.]
 ...
 [1. 0. 0. ... 0. 0. 1.]
 [1. 0. 0. ... 0. 1. 0.]
 [1. 0. 0. ... 0. 1. 0.]]


In [None]:
class autoencoder_model_1 (pl.LightningModule):
  def __init__(self, input_dim, n_hidden = 128, lr = 1e-3):
    super().__init__()
    self.encoder = nn.Sequential(nn.Linear(input_dim, n_hidden), nn.ReLU())
    self.decoder = nn.Sequential(nn.Linear(n_hidden, input_dim), nn.ReLU())
    self.double()
    self.save_hyperparameters()
    self.learning_rate = lr

  def forward (self, features):
    reconstruction = self.encoder(features)
    reconstruction = self.decoder(reconstruction)
    return reconstruction
  
  def training_step(self, batch, batch_idx):
        # training_step defined the train loop.
        # It is independent of forward
        x = batch
        x = x[0].view(x[0].size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss = F.mse_loss(x_hat, x)
        # Logging to TensorBoard by default
        self.log("train_loss", loss, on_epoch = True)
        wandb.log({ "loss": loss})
        return loss
  
  def configure_optimizers(self):   
        return torch.optim.Adam(self.parameters(), lr=self.learning_rate)

  def train_dataloader(self):
    genotype_dataset = TensorDataset(torch.tensor(dataset, dtype=torch.float64))
    dataloader = DataLoader(genotype_dataset,batch_size=10)
    return dataloader

In [None]:
#Configurations 
k_fold = 5
epochs = 30
results = {}

#Set seed number 
torch.manual_seed(333)

#K-fold cross validator
kFold = KFold (n_splits=k_fold, shuffle = True)


print("-----------------------------------")

dataset = dataset[:,200000:200500]

#K-Fold loop 
for fold, (train_i, valid_i) in enumerate(kFold.split(dataset)):
  print(f"FOLD{fold}")
  print("-----------------------------------")

  train_subsample = torch.utils.data.SubsetRandomSampler(train_i)
  valid_subsample = torch.utils.data.SubsetRandomSampler(valid_i)

  genotype_dataset = TensorDataset(torch.tensor(dataset, dtype=torch.float64))

  train_loader = DataLoader(genotype_dataset, batch_size = 10, sampler=train_subsample)
  valid_loader = DataLoader(genotype_dataset, batch_size = 10, sampler=valid_subsample)
  wandb_logger = pl.loggers.WandbLogger(project="Imputation Autoencoder Project")
  model = autoencoder_model_1(500)
  trainer = pl.Trainer(
      logger=wandb_logger,    # W&B integration
      log_every_n_steps=1,    # set the logging frequency
      gpus=-1,                # use all GPUs
      max_epochs=epochs,      # number of epochs
      deterministic=True,     # keep it deterministic
      auto_lr_find = True     # Find the learning rate
  )
  args = {
      "learning_rate": 0.001,
      "architecture": "Autoencoder",
      "dataset": "1000 Genome Project",
      "layer_1": 64,
      "activation_1": "relu",
      "layer_2": 128,
      "activation_2": "relu",
      "optimizer": "Adam",
      "loss": "MSE",
      "metric": "CV Accuracy",
      "epoch": 30,
      "batch_size": 10,
      "n_hidden_layers":1}
  group_name = "bin_group_1000"
  name=group_name+'_seed_'+str(np.random.randint(100000000))
  run=wandb.init(project="Imputation Autoencoder Project",save_code=False,
                group=group_name,entity="connia",name=name,
                mode="online",id=name,config=args,allow_val_change=True)

  trainer.fit(model, train_loader)
  wandb.finish()
  print('Training process has finished. Saving trained model.')
  print('Starting testing')

  #Save model 
  #path = f"./model-fold-{fold}.pth"
  #torch.save(model.state_dict(), path)

  #Evaluation per fold 
  correct, total = 0,0
  model.eval()
  with torch.no_grad():
    for i, data in enumerate(valid_loader,0):
      inputs = data
      outputs = model(inputs[0])
      #predicted = outputs.data
      prediction = np.round(outputs)
      total += inputs[0].shape[0] * inputs[0].shape[1]
      prediction = prediction.numpy()
      inputs = inputs[0].numpy()
      correct += np.sum(prediction==inputs)

      # Print accuracy
      print('Accuracy for fold %d: %d %%' % (fold, 100.0 * correct / total))
      print('--------------------------------')
      results[fold] = 100.0 * (correct / total)
    # Print fold results
  print(f'K-FOLD CROSS VALIDATION RESULTS FOR {k_fold} FOLDS')
  print('--------------------------------')
  sum = 0.0
  for key, value in results.items():
    print(f'Fold {key}: {value} %')
    sum += value
  print(f'Average: {sum/len(results.items())} %')

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


-----------------------------------
FOLD0
-----------------------------------


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  "There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse"

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Sequential | 64.1 K
1 | decoder | Sequential | 64.5 K
---------------------------------------
128 K     Trainable params
0         Non-trainable params
128 K     Total params
0.515     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
loss,█▅▃▂▃▂▂▂▃▂▂▂▂▁▂▂▂▂▂▂▂▁▂▂▂▂▁▁▁▂▁▂▃▁▁▁▂▃▂▁
train_loss_epoch,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_step,█▅▃▂▃▂▂▂▃▂▂▂▂▁▂▂▂▂▂▂▂▁▂▂▂▂▁▁▁▂▁▂▃▁▁▁▂▃▂▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,29.0
loss,0.0666
train_loss_epoch,0.05431
train_loss_step,0.0666
trainer/global_step,1229.0


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
[34m[1mwandb[0m: Currently logged in as: [33mconnia[0m (use `wandb login --relogin` to force relogin)


Training process has finished. Saving trained model.
Starting testing
Accuracy for fold 0: 95 %
--------------------------------
Accuracy for fold 0: 95 %
--------------------------------
Accuracy for fold 0: 95 %
--------------------------------
Accuracy for fold 0: 95 %
--------------------------------
Accuracy for fold 0: 95 %
--------------------------------
Accuracy for fold 0: 95 %
--------------------------------
Accuracy for fold 0: 95 %
--------------------------------
Accuracy for fold 0: 95 %
--------------------------------
Accuracy for fold 0: 95 %
--------------------------------
Accuracy for fold 0: 95 %
--------------------------------
Accuracy for fold 0: 95 %
--------------------------------
K-FOLD CROSS VALIDATION RESULTS FOR 5 FOLDS
--------------------------------
Fold 0: 95.66336633663366 %
Average: 95.66336633663366 %
FOLD1
-----------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Sequential | 64.1 K
1 | decoder | Sequential | 64.5 K
---------------------------------------
128 K     Trainable params
0         Non-trainable params
128 K     Total params
0.515     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
loss,█▆▃▂▂▃▂▂▃▁▂▁▃▂▁▁▂▂▃▃▂▂▁▁▂▁▂▂▂▂▁▂▃▂▁▂▂▂▂▂
train_loss_epoch,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_step,█▆▃▂▂▃▂▂▃▁▂▁▃▂▁▁▂▂▃▃▂▂▁▁▂▁▂▂▂▂▁▂▃▂▁▂▂▂▂▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,29.0
loss,0.03213
train_loss_epoch,0.062
train_loss_step,0.03213
trainer/global_step,1229.0


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


Training process has finished. Saving trained model.
Starting testing
Accuracy for fold 1: 94 %
--------------------------------
Accuracy for fold 1: 94 %
--------------------------------
Accuracy for fold 1: 94 %
--------------------------------
Accuracy for fold 1: 95 %
--------------------------------
Accuracy for fold 1: 95 %
--------------------------------
Accuracy for fold 1: 95 %
--------------------------------
Accuracy for fold 1: 95 %
--------------------------------
Accuracy for fold 1: 95 %
--------------------------------
Accuracy for fold 1: 95 %
--------------------------------
Accuracy for fold 1: 95 %
--------------------------------
Accuracy for fold 1: 95 %
--------------------------------
K-FOLD CROSS VALIDATION RESULTS FOR 5 FOLDS
--------------------------------
Fold 0: 95.66336633663366 %
Fold 1: 95.07524752475247 %
Average: 95.36930693069306 %
FOLD2
-----------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Sequential | 64.1 K
1 | decoder | Sequential | 64.5 K
---------------------------------------
128 K     Trainable params
0         Non-trainable params
128 K     Total params
0.515     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
loss,█▄▃▃▃▃▃▂▂▂▂▂▃▂▁▄▂▂▂▂▃▅▂▁▁▃▄▂▂▂▃▂▂▃▂▂▁▁▃▁
train_loss_epoch,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_step,█▄▃▃▃▃▃▂▂▂▂▂▃▂▁▄▂▂▂▂▃▅▂▁▁▃▄▂▂▂▃▂▂▃▂▂▁▁▃▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,29.0
loss,0.04138
train_loss_epoch,0.06657
train_loss_step,0.04138
trainer/global_step,1229.0


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


Training process has finished. Saving trained model.
Starting testing
Accuracy for fold 2: 94 %
--------------------------------
Accuracy for fold 2: 94 %
--------------------------------
Accuracy for fold 2: 94 %
--------------------------------
Accuracy for fold 2: 94 %
--------------------------------
Accuracy for fold 2: 94 %
--------------------------------
Accuracy for fold 2: 94 %
--------------------------------
Accuracy for fold 2: 94 %
--------------------------------
Accuracy for fold 2: 94 %
--------------------------------
Accuracy for fold 2: 94 %
--------------------------------
Accuracy for fold 2: 94 %
--------------------------------
Accuracy for fold 2: 94 %
--------------------------------
K-FOLD CROSS VALIDATION RESULTS FOR 5 FOLDS
--------------------------------
Fold 0: 95.66336633663366 %
Fold 1: 95.07524752475247 %
Fold 2: 94.86336633663366 %
Average: 95.20066006600659 %
FOLD3
-----------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Sequential | 64.1 K
1 | decoder | Sequential | 64.5 K
---------------------------------------
128 K     Trainable params
0         Non-trainable params
128 K     Total params
0.515     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
loss,█▅▃▄▂▃▂▂▃▃▂▃▃▂▁▂▃▂▁▁▃▃▂▂▂▃▃▂▁▁▂▂▂▂▂▄▂▂▂▃
train_loss_epoch,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_step,█▅▃▄▂▃▂▂▃▃▂▃▃▂▁▂▃▂▁▁▃▃▂▂▂▃▃▂▁▁▂▂▂▂▂▄▂▂▂▃
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,29.0
loss,0.04733
train_loss_epoch,0.0595
train_loss_step,0.04733
trainer/global_step,1229.0


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


Training process has finished. Saving trained model.
Starting testing
Accuracy for fold 3: 94 %
--------------------------------
Accuracy for fold 3: 94 %
--------------------------------
Accuracy for fold 3: 94 %
--------------------------------
Accuracy for fold 3: 94 %
--------------------------------
Accuracy for fold 3: 94 %
--------------------------------
Accuracy for fold 3: 94 %
--------------------------------
Accuracy for fold 3: 94 %
--------------------------------
Accuracy for fold 3: 94 %
--------------------------------
Accuracy for fold 3: 94 %
--------------------------------
Accuracy for fold 3: 94 %
--------------------------------
K-FOLD CROSS VALIDATION RESULTS FOR 5 FOLDS
--------------------------------
Fold 0: 95.66336633663366 %
Fold 1: 95.07524752475247 %
Fold 2: 94.86336633663366 %
Fold 3: 94.64399999999999 %
Average: 95.06149504950494 %
FOLD4
-----------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Sequential | 64.1 K
1 | decoder | Sequential | 64.5 K
---------------------------------------
128 K     Trainable params
0         Non-trainable params
128 K     Total params
0.515     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
loss,█▆▃▂▃▃▃▃▂▂▄▃▃▂▂▂▂▂▂▁▂▂▂▂▃▂▂▁▁▂▃▃▃▂▂▄▂▁▂▂
train_loss_epoch,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_step,█▆▃▂▃▃▃▃▂▂▄▃▃▂▂▂▂▂▂▁▂▂▂▂▃▂▂▁▁▂▃▃▃▂▂▄▂▁▂▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
epoch,29.0
loss,0.0222
train_loss_epoch,0.04512
train_loss_step,0.0222
trainer/global_step,1229.0


Training process has finished. Saving trained model.
Starting testing
Accuracy for fold 4: 95 %
--------------------------------
Accuracy for fold 4: 96 %
--------------------------------
Accuracy for fold 4: 96 %
--------------------------------
Accuracy for fold 4: 96 %
--------------------------------
Accuracy for fold 4: 96 %
--------------------------------
Accuracy for fold 4: 96 %
--------------------------------
Accuracy for fold 4: 96 %
--------------------------------
Accuracy for fold 4: 96 %
--------------------------------
Accuracy for fold 4: 96 %
--------------------------------
Accuracy for fold 4: 95 %
--------------------------------
K-FOLD CROSS VALIDATION RESULTS FOR 5 FOLDS
--------------------------------
Fold 0: 95.66336633663366 %
Fold 1: 95.07524752475247 %
Fold 2: 94.86336633663366 %
Fold 3: 94.64399999999999 %
Fold 4: 95.95 %
Average: 95.23919603960395 %


In [None]:
epochs = 25
for epoch in range(epochs):
  loss = 0 
  for batch_features in dataloader:
    #unpack 
    batch_features = batch_features[0].view(-1,264695).type(torch.DoubleTensor).to(device)
    #clean_features = clean_features.view(264695,-1).type(torch.DoubleTensor).to(device)
    #reset gradients to 0
    optimizer.zero_grad()
    # forward propogation 
    outputs = model(batch_features)
    # calculate the loss
    train_loss = reconstruction_loss(outputs,batch_features)
    #backprop
    train_loss.backward()
    #update parameters with Adam optimizer 
    optimizer.step()
    # sum the total loss across all samples
    loss += train_loss.item()

  # take the average loss in an epoch
  loss = loss/len(dataloader)
  print( "epoch: {}/{}, loss: {:.6f}".format(epoch+1, epochs, loss))
  wandb.log({ "loss": loss})

epoch: 1/25, loss: 0.449878
epoch: 2/25, loss: 0.311405
epoch: 3/25, loss: 0.304939
epoch: 4/25, loss: 0.268539
epoch: 5/25, loss: 0.267630
epoch: 6/25, loss: 0.266968
epoch: 7/25, loss: 0.266631
epoch: 8/25, loss: 0.266566
epoch: 9/25, loss: 0.267111
epoch: 10/25, loss: 0.266164
epoch: 11/25, loss: 0.265816
epoch: 12/25, loss: 0.266767
epoch: 13/25, loss: 0.265539
epoch: 14/25, loss: 0.264806
epoch: 15/25, loss: 0.264682
epoch: 16/25, loss: 0.264629
epoch: 17/25, loss: 0.264544
epoch: 18/25, loss: 0.264626
epoch: 19/25, loss: 0.264762
epoch: 20/25, loss: 0.264847
epoch: 21/25, loss: 0.265092
epoch: 22/25, loss: 0.265423
epoch: 23/25, loss: 0.265291
epoch: 24/25, loss: 0.265123
epoch: 25/25, loss: 0.264896


In [None]:
rm -rf Imputation\ Autoencoder\ Project

In [None]:
print (model)

autoencoder_model_1(
  (input_layer): Linear(in_features=264695, out_features=256, bias=True)
  (output_layer): Linear(in_features=256, out_features=264695, bias=True)
)


In [None]:
torch.save(model.state_dict(), 'checkpoint.pth')
from google.colab import files 
files.download('checkpoint.pth')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
state_dict = torch.load('checkpoint.pth')
model.load_state_dict(state_dict)
print(model)

autoencoder_model_1(
  (input_layer): Linear(in_features=264695, out_features=64, bias=True)
  (encoder_layer): Linear(in_features=64, out_features=64, bias=True)
  (decoder_layer): Linear(in_features=64, out_features=64, bias=True)
  (output_layer): Linear(in_features=64, out_features=264695, bias=True)
)
