## Cell Density using Deep People Counting

- Based on network discribed in [paper](https://yangliang.github.io/pdf/sp055u.pdf)
- Written in pytorch
- trained on INS1_BF dataset

[This](https://discuss.pytorch.org/t/torch-norm-3-6x-slower-than-manually-calculating-sum-of-squares/14684) fixed some problems with wrong channels

## TODO:

- [ ] Fix the issues with trying to load everything into memory - Error comes from trying to load all images into RAM fix in dataloader
- [ ] fix the mem leak of 152 gb
- [ ] reduce batch size, or iterator fix: [here](https://stackoverflow.com/questions/51444059/how-to-iterate-over-two-dataloaders-simultaneously-using-pytorch/57890309#57890309), [here](https://github.com/pytorch/pytorch/issues/1917#issuecomment-433698337) and [here](https://stackoverflow.com/questions/53280967/pytorch-nextitertraining-loader-extremely-slow-simple-data-cant-num-worke)
- [ ] switch model to use cuda otherwise this is will be hell
- [ ] save model and outpus for future study

In [1]:
import os

import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms, utils, datasets
from torch.utils.data import DataLoader

print(torch.cuda.current_device())
torch.cuda.empty_cache()

0


In [2]:
torch.cuda.device_count()

2

### Notes on Pytorch 

Code:
`torch.nn.Conv2d(inchannels : int, out_channels: int, kernel_size: Union[T, Tuple[T, T]], stride: Union[T, Tuple[T, T]] = 1, padding: Union[T, Tuple[T, T]] = 0, dilation: Union[T, Tuple[T, T]] = 1, groups: int = 1, bias: bool = True, padding_mode: str = 'zeros')`

- inchannels: number of channels 1 for `layer1` then preivous output
- outchannels: reshaped size, input for next layer
- kernel_ size: size of the sliding convolution
- stride: how many spaces the sliding convolution is sliding
- padding: used to maintain size, default zero
- rest maybe tuned for better performance

In [3]:
# Model
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.inp = nn.Sequential( # inputlayer
            nn.Conv2d(3, 96, kernel_size=11,stride=1), # check stride & padding later
            nn.ReLU(), # could use other optimizer here for results, look into it
            nn.MaxPool2d(kernel_size=2, stride=2) # down-sampling check sizes
        )
        self.layer1 = nn.Sequential(
            nn.Conv2d(96,256,kernel_size=3,stride=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(256, 384, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(384,384,kernel_size=2, stride=1),
            nn.ReLU() # NO maxpool since dimension does not change
        )
        self.layer4 = nn.Sequential(
            nn.Conv2d(384,256,kernel_size=2, stride=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer5 = nn.Sequential(
            nn.Conv2d(256,256,kernel_size=6,stride=1)
        )
        # self.dropout # not listed by I like it
        self.fc1 = nn.Linear(6*6*256, 4096)
        self.fc2 = nn.Linear(4096, 1) # one being the number of 'people' in image
        
    def forward(self, x):
        out = self.inp(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = out.reshape((-1,)) # flatten to fully connected layers 6x6x259 -> 4096x1
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [None]:
# simple model (for mem leak problems)
class SimpModel(nn.Module):
    def __init__(self):
        super(SimpModel, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=2, stride=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer1 = nn.Sequential(
            nn.Conv2d(96, 256, kernel_size=6, stride=1),
            nn.ReLU(),
        )
        self.fc1 = nn.Linear(6*6*256, 4096)
        self.fc2 = nn.Linear(4096, 1)
    
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [4]:
# make the model
model = Model()

# model = SimpModel()

# model = model.cuda()
# loss and optimization
num_epochs = 5
batch_size = 1
learning_rate = 0.001

criterion = nn.MSELoss()
optimizer = torch,optim.SGD(model.parameters(), lr=learning_rate)

In [5]:
torch.cuda.empty_cache()

torch.cuda.memory_allocated(0)

0

In [6]:
# load in data and create train and test sets (80/20)
# load into data loaders
PATH = 'data/INS1_BF/'

# transformer
trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307), (0.3081))])
# todo: find better normalization values for this

# load dataset
file_list = os.listdir(PATH)
print(file_list)

['6000', '4000', '2000', '10000', '8000', '0']


In [7]:
# load in data and create classes
data = datasets.ImageFolder(root=PATH, transform=trans)
print(data.classes)

['0', '10000', '2000', '4000', '6000', '8000']


In [8]:
# test/train split
train_size = int(0.7*len(data))
test_size = len(data) - train_size
train_set, test_set = torch.utils.data.random_split(data, [train_size, test_size])
print(len(train_set), len(test_set))

268 116


In [9]:
print(len(data))

384


In [10]:
# data loaderss
train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=False)

In [None]:
# # train the model
# total_step = len(train_loader)
# loss_list = []
# acc_list = []

# torch.cuda.empty_cache()


# for epoch in range(num_epochs):
#     for i, (images, labels) in enumerate(train_loader):
        
#         # use cuda
#         images, labels = images.cuda(), labels.cuda()
                
#         # foward pass
#         outputs = model(images)
#         loss = criterion(outputs, labels)
#         loss_list.append(loss.item())
        
#         # backpropagation
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()
        
#         # accuracy
#         total = labels.size(0)
#         _, predicted = torch.max(outputs.data, 1)
#         correct = (predicted ==labels).sum().item()
#         acc_list.append(correct/total)
        
#         if (i+1) % 100 == 0:
#             print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Accuarcy: {:.2f}%'
#                  .format(epoch+1, num_epochs, i+1, total_step, loss.item(), (correct/total)*100))
        
#         loss = None
#         correct = None
#         torch.cuda.empty_cache()
        

In [11]:
# testy mctest face if this works the mem leak is caused by enumerating the dataloader

# train the model
total_step = len(train_loader)
loss_list = []
acc_list = []

torch.cuda.empty_cache()

for i, (images, labels) in enumerate(train_loader):
        
    # use cuda
#     images, labels = images.cuda(), labels.cuda()
                
    # foward pass
    outputs = model(images)
    loss = criterion(outputs, labels)
    loss_list.append(loss.item())
        
    # backpropagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
        
    # accuracy
    total = labels.size(0)
    _, predicted = torch.max(outputs.data, 1)
    correct = (predicted ==labels).sum().item()
    acc_list.append(correct/total)
    
    if (i+1) % 100 == 0:
        print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Accuarcy: {:.2f}%'.format(epoch+1, num_epochs, i+1, total_step, loss.item(), (correct/total)*100))
        
    loss = None
    correct = None
#     torch.cuda.empty_cache()

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x3504384 and 9216x4096)

In [None]:
for i, (images, labels) in enumerate(train_loader):
    print("index {} Image: {}".format(i, images))

In [None]:
subsets_indices = [0]
subset = torch.utils.data.Subset(train_set, subsets_indices)
print(subset)


In [None]:
# check on data
print(train_set.num_features)
print(train_set.num_classes)

In [None]:
len(train_loader)

Something to consider to avoid loading the dataset into memory

```
>>> l = ['a','b','c']
>>> it = iter(l)
>>> for i in range(len(l)): print(f'{i} : {next(it)}')
```
