# Modeling Sanity Check: Why is my model not converging?

In this notebook, we'll test our neural network on a simple binary classification dataset to validate that our DataLoader is working correctly.

In [1]:
import dask.dataframe as dd
import pandas as pd 
import torch
import linecache 
import csv
import numpy as np
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

Let's define our custom data class and make sure everything is being streamed in correctly

In [2]:
df = pd.read_csv('datatest.csv')
labels = pd.read_csv('labelstest.csv')
labels['Pclass'].value_counts()

1    491
0    216
Name: Pclass, dtype: int64

In [25]:
class CustomDataset(Dataset):
    def __init__(self, filename, labelname):
        self._filename = filename
        self._labelname = labelname
        self._total_data = 0
        
        with open(filename, "r") as f:
            self._labelname = len(f.readlines()) - 1
    
    def __getitem__(self, idx):
        line = linecache.getline(self._filename, idx + 2)
        label = linecache.getline(self._labelname, idx + 2)
        
        csv_data = csv.reader([line])
        csv_label = csv.reader([label])
        
        data = [x for x in csv_data][0]
        label = [x for x in csv_label][0]
        
        return torch.from_numpy(np.array([float(x) for x in data])).float(), [int(float(x)) for x in label][0]
    
    def __len__(self):
        return self._total_data

class CustomDataset2(Dataset):
    def __init__(self, data, labels):
        self.data = pd.read_csv(data)
        self.labels = pd.read_csv(labels)
        
    def __getitem__(self, i):
        return torch.from_numpy(self.data.iloc[i, :].values).float(), self.labels.iloc[i, :].values[0]
    
    def __len__(self):
        return self.data.shape[0]

In [26]:
t = CustomDataset('datatest.csv', 'labelstest.csv')
traindata = DataLoader(t, batch_size=8, num_workers=0)

In [30]:
for X, y in t:
    print(X, y)

AttributeError: 'int' object has no attribute 'startswith'

Now that we have our dataset, we'll train a simple feedforward neural network and see if it converges. In fact, it should overfit quite a lot 

In [10]:
import torch

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(3, 512),
            nn.ReLU(),
            nn.Linear(512, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 2),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [11]:
model = NeuralNetwork()
model

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=3, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=1024, bias=True)
    (3): ReLU()
    (4): Linear(in_features=1024, out_features=512, bias=True)
    (5): ReLU()
    (6): Linear(in_features=512, out_features=2, bias=True)
  )
)

And now we can train our network

In [12]:
from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import Accuracy, Loss

# model = NN()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

trainer = create_supervised_trainer(model, optimizer, criterion)

val_metrics = {
    "accuracy": Accuracy(),
    "nll": Loss(criterion)
}

evaluator = create_supervised_evaluator(model, metrics=val_metrics)

log_interval = 3

@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(trainer):
    evaluator.run(traindata)
    metrics = evaluator.state.metrics
    print(f"Training Results - Epoch: {trainer.state.epoch}  Avg accuracy: {metrics['accuracy']:.2f} Avg loss: {metrics['nll']:.2f}")

In [13]:
trainer.run(traindata, max_epochs=100)

Current run is terminating due to exception: list index out of range
Engine run is terminating due to exception: list index out of range


IndexError: list index out of range

In [None]:
trainer.run(traindata2, max_epochs=100)