In [0]:
!unzip data.zip -d data2


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: data2/data/2010-01-01-2020-01-01-20-False/images/RHC137.jpeg  
  inflating: data2/data/2010-01-01-2020-01-01-20-False/images/RHC138.jpeg  
  inflating: data2/data/2010-01-01-2020-01-01-20-False/images/RHC139.jpeg  
  inflating: data2/data/2010-01-01-2020-01-01-20-False/images/RHC14.jpeg  
  inflating: data2/data/2010-01-01-2020-01-01-20-False/images/RHC140.jpeg  
  inflating: data2/data/2010-01-01-2020-01-01-20-False/images/RHC141.jpeg  
  inflating: data2/data/2010-01-01-2020-01-01-20-False/images/RHC142.jpeg  
  inflating: data2/data/2010-01-01-2020-01-01-20-False/images/RHC143.jpeg  
  inflating: data2/data/2010-01-01-2020-01-01-20-False/images/RHC144.jpeg  
  inflating: data2/data/2010-01-01-2020-01-01-20-False/images/RHC145.jpeg  
  inflating: data2/data/2010-01-01-2020-01-01-20-False/images/RHC146.jpeg  
  inflating: data2/data/2010-01-01-2020-01-01-20-False/images/RHC147.jpeg  
  inflating: data2/data/

In [0]:
import torch
from torch.utils import data
import pandas as pd 
import numpy as np 
from torchvision import transforms
from torch.utils.data.dataset import Dataset
from PIL import Image


#https://github.com/utkuozbulak/pytorch-custom-dataset-examples#custom-dataset-fundamentals

class ImageDataset(Dataset):
    def __init__(self, dir_path):
        #read in csv for labels 
        self.labels_info = pd.read_csv(dir_path+'/labels/labels.csv', header=None)

        #first column of labels is ID 
        self.img_names = np.asarray(self.labels_info.iloc[:,0])

        #second column is labels 
        self.labels= np.asarray(self.labels_info.iloc[:,1])

        self.dir_path = dir_path 

        self.transformations = transforms.Compose([transforms.ToTensor(),
                                        transforms.Normalize((0.5,), (0.5,))])
        
        #self.transformations = transforms.Compose([transforms.ToTensor()])

    def __len__(self):
        return len(self.labels_info.index)

    def __getitem__(self, index):
        
        #get image file name 
        img_name = self.img_names[index]

        #open image 
        image = Image.open(self.dir_path+'/images/'+img_name+'.jpeg')

        #apply transform to tensor 
        image = self.transformations(image)

        #flatten 
        #print(image.shape)
        #image = image.reshape(1, -1)
        #image = image.squeeze()
        #print(image.shape)


        #get label of image 
        label = self.labels[index]
        
        return (image,label)


In [0]:
#from dataset import ImageDataset
import torch
from torchvision import datasets, transforms
from torch import nn, optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as tnn

#https://towardsdatascience.com/understanding-pytorch-with-an-example-a-step-by-step-tutorial-81fc5f8c4e8e


class Linear(nn.Module):


    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(7500, 2000)
        self.fc2 = nn.Linear(2000, 500)
        self.fc3 = nn.Linear(500,1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = x.view(x.shape[0], -1)  # make sure inputs are flattened

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.sigmoid(x)  # preserve batch dim

        return x  
class CNN(nn.Module):
    
    
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3,10, kernel_size = 5, stride = 1), 
            #nn.BatchNorm2d(10),
            nn.ReLU(), 
            nn.MaxPool2d(kernel_size = 2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(10, 20, kernel_size = 5, stride = 1),
            #nn.BatchNorm2d(50),
            nn.ReLU(), 
            nn.MaxPool2d(kernel_size = 2)
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(10, 20, kernel_size = 5, stride = 1),
            #nn.BatchNorm2d(50),
            nn.ReLU(), 
            nn.MaxPool2d(kernel_size = 2)
        )


        #self.fc1 = nn.Linear(1620,100)
        #self.fc2 = nn.Linear(100,10)
        #self.fc3 = nn.Linear(10,1)
        self.fc1 = nn.Linear(80,10)
        self.fc3 = nn.Linear(10,1)
        self.softmax = nn.LogSoftmax(dim=1)
        self.sigmoid = nn.Sigmoid()
        self.dropout = nn.Dropout(p=0.2, inplace=False)
#https://blog.insightdatascience.com/automating-breast-cancer-detection-with-deep-learning-d8b49da17950

    def forward(self, x):
        x = self.dropout(self.conv1(x))
        
        x = self.dropout(self.conv2(x))

        

        x = x.reshape(x.size(0), -1)

        x = self.dropout(self.fc1(x))

        x = F.relu(x)


        #x = self.fc2(x)

        #x = F.relu(x)

        x = self.fc3(x)
        #x = self.softmax(x)
        x = self.sigmoid(x)

        x = x.view(-1)
        return x

    
class NNModel:
    def __init__(self, network, learning_rate):
        #train_set = ImageDataset('data/data/2010-01-01-2020-01-01-20-False')
        train_set = ImageDataset('data2/data/2020-01-01-2020-03-03-20-False')


        self.trainloader = torch.utils.data.DataLoader(dataset=train_set, batch_size = 20, shuffle=True, drop_last=True)

        #test_set = ImageDataset('data/data/2019-01-01-2020-01-01-20-False')
        test_set = ImageDataset('data2/data/2020-01-01-2020-03-03-20-False')


        self.testloader = torch.utils.data.DataLoader(dataset=test_set, batch_size=20, shuffle=False, drop_last=True)

        self.learning_rate = learning_rate 

        self.model= network
        
        self.lossfn = nn.BCELoss()  

        #not sure for this 
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
    

    #https://www.analyticsvidhya.com/blog/2019/01/guide-pytorch-neural-networks-case-studies/
    def train_step(self, epoch):
        self.model.train()

        running_loss = []

        #train one batch 
        for i, data in enumerate(self.trainloader):

            images, labels = data   

            #forward propagation  
            output = self.model(images)

            #loss calculation for one batch 
            labels = labels.type_as(output)
            loss = self.lossfn(output, labels)

            #zero gradients 
            self.optimizer.zero_grad()

            #backward propagation 
            loss.backward()

            #weight optimization 
            self.optimizer.step()

            #running_loss += loss.item()
            running_loss.append(loss.item())

            #print(i)

            if i % 3 == 2:
                print("Epoch: %2d, Batch: %4d, Loss: %.3f" % (epoch + 1, i + 1, np.mean(running_loss)))
                running_loss = []

        #print(running_loss)
        #return np.mean(running_loss)    




    #def train_epoch(self):
    #    #for epoch in range(n):
    #    train_loss = self.train_step()
        #print("Trainset accuracy for epoch:" + str(epoch))
        #print(np.mean(train_loss))
    #    return train_loss


    def eval(self):
        #eval mode for setting things like dropout 
        self.model.eval()
        true_pos, true_neg, false_pos, false_neg = 0, 0, 0, 0

        for images, labels in self.testloader:

            output = self.model(images)



            tp_batch, tn_batch, fp_batch, fn_batch = measures(output, labels)
            true_pos += tp_batch
            true_neg += tn_batch
            false_pos += fp_batch
            false_neg += fn_batch
            #accuracy = np.mean(valid_loss)

        accuracy = 100 * (true_pos + true_neg) / (true_pos+true_neg+false_pos+false_neg)
        print(true_pos)
        print(true_neg)
        print(false_pos)
        print(false_neg)
        print(accuracy)

def measures(outputs, labels):
    """
    TODO:
    Return (in the following order): the number of true positive
    classifications, true negatives, false positives and false
    negatives from the given batch outputs and provided labels.

    outputs and labels are torch tensors.
    """

    nTruePos, nTrueNeg, nFalsePos, nFalseNeg = 0, 0, 0, 0
    for o, l in zip(outputs, labels):
        #m = torch.nn.Sigmoid()
        print(o)
        o = torch.round(o)

        if (o == 1 and l == 1):
            nTruePos += 1
        if (o == 1 and l == 0):
            nFalsePos += 1
        if (o == 0 and l == 0):
            nTrueNeg += 1
        if (o == 0 and l == 1):
            nFalseNeg += 1
    return (nTruePos, nTrueNeg, nFalsePos, nFalseNeg)




def main():
    epochs = 10
    results = []

    model = CNN()
    
    m = NNModel(model, 0.01)


    for e in range(epochs):
        print("Training for epoch:"+str(e+1))
        m.train_step(e)
        #print(running_loss)

    #evaluate for tests 
    m.eval()



    


if __name__ == "__main__":
    main()

    

Training for epoch:1
Epoch:  1, Batch:    3, Loss: 0.711
Epoch:  1, Batch:    6, Loss: 0.642
Training for epoch:2
Epoch:  2, Batch:    3, Loss: 0.666
Epoch:  2, Batch:    6, Loss: 0.643
Training for epoch:3
Epoch:  3, Batch:    3, Loss: 0.641
Epoch:  3, Batch:    6, Loss: 0.696
Training for epoch:4
Epoch:  4, Batch:    3, Loss: 0.659
Epoch:  4, Batch:    6, Loss: 0.626
Training for epoch:5
Epoch:  5, Batch:    3, Loss: 0.598
Epoch:  5, Batch:    6, Loss: 0.649
Training for epoch:6
Epoch:  6, Batch:    3, Loss: 0.656
Epoch:  6, Batch:    6, Loss: 0.664
Training for epoch:7
Epoch:  7, Batch:    3, Loss: 0.659
Epoch:  7, Batch:    6, Loss: 0.655
Training for epoch:8
Epoch:  8, Batch:    3, Loss: 0.638
Epoch:  8, Batch:    6, Loss: 0.691
Training for epoch:9
Epoch:  9, Batch:    3, Loss: 0.632
Epoch:  9, Batch:    6, Loss: 0.632
Training for epoch:10
Epoch: 10, Batch:    3, Loss: 0.574
Epoch: 10, Batch:    6, Loss: 0.748
tensor(0.4197, grad_fn=<SelectBackward>)
tensor(0.4152, grad_fn=<Sele