In [19]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import utils 
import torch
import models
from torch import nn
from torch.nn import functional as F
from torchsummary import summary
from einops import rearrange, reduce, repeat
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint


In [20]:
wandb_logger = WandbLogger(project="test_20220711", id='test')

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33macercyc[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [21]:
dataset_train, dataset_val = utils.LoadData.mouseMovementRollingData()
dataloader_train = torch.utils.data.DataLoader(dataset_train, batch_size=128, shuffle=True)
dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=128, shuffle=False)

In [31]:
len(dataset_val)

1374

In [22]:
class PL_model(pl.LightningModule):
    def __init__(self):
        super(PL_model, self).__init__()
        self.model = models.TrajNet_tran2tran()
        self.fig = plt.figure()
    
    def forward(self, x):
        return self.model(x)
    
    def training_step(self, batch, batch_idx):
        batch = utils.DataProcessing.seqTrim(batch, 30)
        y = self.forward(batch)
        loss = torch.nn.functional.mse_loss(y, batch)
        self.log('train_loss', loss)
        return loss
    
    def validation_step(self, batch, batch_idx):
        y = self.forward(batch)
        loss = torch.nn.functional.mse_loss(y, batch)
        self.log('val_loss', loss)
        
        # plot example
        utils.Plot.model_train_eval_example
        return loss
    
    def validation_epoch_end(self, validation_step_outputs):
        self.fig.clear()
        
        x_train = dataset_train[0:1]
        y_train = self.forward(torch.from_numpy(x_train).type_as(validation_step_outputs))
        y_train = y_train.detach().cpu().numpy()
        ax = self.fig.add_subplot(1, 2, 1)
        utils.Plot.traj_and_Reconstruc(x_train.cumsum(), y_train.cumsum(), ax, legend=False)        
        
        x_val = dataset_train[0:1]
        y_val = self.forward(torch.from_numpy(x_val).type_as(validation_step_outputs))
        y_val = y_val.detach().cpu().numpy()
        ax = self.fig.add_subplot(1, 2, 2)
        utils.Plot.traj_and_Reconstruc(x_val.cumsum(), y_val.cumsum(), ax, legend=False)
        
        img = utils.Plot.fig2img(self.fig)
        wandb_logger.log_image('traj', [img])
        
            
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.001)
    



In [23]:
callbacks = []
callbacks.append(EarlyStopping('val_loss', patience=10, mode='min'))
callbacks.append(ModelCheckpoint(monitor='val_loss', mode='min', verbose=True))

In [25]:
model = PL_model().double()
trainer = pl.Trainer(max_epochs=100000, 
                     logger=wandb_logger,
                     accelerator='gpu', 
                     strategy='dp',
                     callbacks=callbacks)
trainer.fit(model, dataloader_train, dataloader_val)

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name  | Type              | Params
--------------------------------------------
0 | model | TrajNet_tran2tran | 537 K 
--------------------------------------------
537 K     Trainable params
0         Non-trainable params
537 K     Total params
2.151     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


RuntimeError: Caught RuntimeError in replica 0 on device 0.
Original Traceback (most recent call last):
  File "/home/acercyc/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/parallel/parallel_apply.py", line 61, in _worker
    output = module(*input, **kwargs)
  File "/home/acercyc/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/acercyc/anaconda3/envs/py38/lib/python3.8/site-packages/pytorch_lightning/overrides/data_parallel.py", line 64, in forward
    output = super().forward(*inputs, **kwargs)
  File "/home/acercyc/anaconda3/envs/py38/lib/python3.8/site-packages/pytorch_lightning/overrides/base.py", line 93, in forward
    return self.module.validation_step(*inputs, **kwargs)
  File "/tmp/ipykernel_2220998/2175216957.py", line 18, in validation_step
    y = self.forward(batch)
  File "/tmp/ipykernel_2220998/2175216957.py", line 8, in forward
    return self.model(x)
  File "/home/acercyc/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/acercyc/projects/Keio Testing_analysis/src/models.py", line 89, in forward
    x = self.enc_conv(x)
  File "/home/acercyc/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/acercyc/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 302, in forward
    return self._conv_forward(input, self.weight, self.bias)
  File "/home/acercyc/anaconda3/envs/py38/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 298, in _conv_forward
    return F.conv1d(input, weight, bias, self.stride,
RuntimeError: Given groups=1, weight of size [64, 18, 1], expected input[64, 10, 72] to have 18 channels, but got 10 channels instead


<Figure size 432x288 with 0 Axes>