In [6]:
import argparse
from PIL import Image
import numpy as np 
import os
import random
from collections import Counter
from itertools import islice
import torch


  from .autonotebook import tqdm as notebook_tqdm


In [7]:
def get_training_and_test_set(train_path): 
    """
    Gets a path for where the training data is, and extracts the names of the images and the corresponding ground truth value. 
    These are shuffled to not get biased data when splitting the data up in a training set and a validation set. 
    splits the training data into 80% training and 20% validation. 

    Inputs: 
        train_path: relative path of where the training data is 
    Output: 
        validation_data: dictionary with data used for validation, giving the gorund truth value as the key and the 
        corresponding image names as a list of every image which has the ground truth value. 
        training_data: dictionary with data used for training, built up in the same way as validation_data
        training_test_data: dictionary with image as key and 0 as value.
    """
    relative_path = os.path.abspath(os.path.dirname('__file__'))
    train_path = os.path.join(relative_path, train_path)
    validation_data, training_test_data, training_data = {}, {}, {}
    with open(train_path + '\\truth.dsv', "r") as csv_file:
        data = csv_file.readlines()
    random.shuffle(data)
    i = 0 
    for pic in data: 
        i += 1
        if (i % 8 == 0): 
            validation_data[pic.split(':')[0]] = pic.split(':')[1]
      
        else: 
            if(not pic.split(':')[1].strip('\n') in training_data): 
                training_data[pic.split(':')[1].strip('\n')] = []
            training_data[pic.split(':')[1].strip('\n')].append(pic.split(':')[0])
            training_test_data[pic.split(':')[0]] = pic.split(':')[1].strip('\n')
    return training_data, validation_data, training_test_data

In [8]:
from PIL import Image
import torch 
from torchvision import datasets
from torchvision.transforms import ToTensor
import torch.nn as nn 
import torch.nn.functional as F 
import torchvision
import torchvision.transforms as transforms 
import matplotlib.pyplot as plt 
import numpy as np 
import random
from torch.utils.data import TensorDataset, DataLoader

class TensorData: 
    def __init__(self, label, image_name, image_tensor): 
        self.label = label 
        self.image_name = image_name 
        self.image_tensor = image_tensor
    
        


        

In [9]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def get_training_list_as_tensor(path, test_set):
    picture = 0 
    gt_and_image_dict = []
    for image in test_set: 
        picture = Image.open(path + "\\" + image)
        break
    image_tensor = torch.ones((len(test_set), 1,  picture.width, picture.height))   
    ground_truths = [] 
    i = 0 
    for pic_names, gt in test_set.items(): 
        picture = Image.open(path + "\\" + pic_names)
        transform = transforms.Compose([transforms.PILToTensor()])
        ground_truths.append(gt)
        image_tensor[i] = transform(picture)
        gt_and_image_dict.append([image_tensor[i],torch.tensor((int(gt)))])
        i += 1
    return image_tensor, ground_truths, gt_and_image_dict
        
   

def split(a, n):
    k, m = divmod(len(a), n)
    return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))

input_size = 784 #28x28
hidden_size = 100
num_classes = 10 


num_epochs = 4
batch_size = 100 
learning_rate = 0.001

#TODO se om de to første her trenger å returneres 
train_data_dict, validation_data_dict, test_data_dict = get_training_and_test_set('train_1000_28')
train_images, train_gts, gt_and_trainingimages = get_training_list_as_tensor('train_1000_28', test_data_dict)
test_images, test_gts, gt_and_testimages = get_training_list_as_tensor('train_1000_28', validation_data_dict)


train_loader = DataLoader(gt_and_trainingimages, batch_size, shuffle=False)
test_loader = DataLoader(gt_and_testimages, batch_size, shuffle=False)
classes = list(train_data_dict.keys())




class ConvNet(torch.nn.Module): 
    def __init__(self): 
        super().__init__()
        #tror vi kun har 1 color channel, så input size er 1? 
        self.conv1 = nn.Conv2d(1, 8, kernel_size=4, padding=1)
        self.pool = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(8, 20, kernel_size=4, padding=1)
        self.fc1 = nn.Linear(20*6*6, 120) 
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, len(classes))

    def forward(self, x): 
        x = self.pool(F.relu(self.conv1(x)))
        #x size: 100, 8, 13, 13
        x = self.pool(F.relu(self.conv2(x)))
        #x size: 100, 20, 6, 6
        x = x.view(-1, 20*6*6)
        #x size: 1, 7200
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x 
        
    
model = ConvNet()
#crossentropyloss includes softmax. 
criterion = nn.CrossEntropyLoss()
#brukte SGD i videoen 
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)


n_total_steps = len(train_loader)
for epoch in range(num_epochs): 
    for i, (images, labels) in enumerate(train_loader): 
        #images = images.to(device)
        #labels = labels.to(device) 
        
        #forward pass 
        outputs = model(images) 
        loss = criterion(outputs, labels)
        
        #backward and optimize 
        optimizer.zero_grad()
        loss.backward() 
        optimizer.step() 
        
print('Finished Training')
PATH = './cnn.pth'
torch.save(model.state_dict(), PATH)

with torch.no_grad():
    n_correct = 0
    n_samples = 0
    n_class_correct = [0 for i in range(10)]
    n_class_samples = [0 for i in range(10)]
    
    for images, labels in test_loader: 

        #images = images.to(device)
        #labels = labels.to(device) 
        outputs = model(images)
        # max returns (value ,index)
        _, predicted = torch.max(outputs, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()
        
        for i in range(len(labels)):
            label = labels[i]
            pred = predicted[i]
            if (label == pred):
                n_class_correct[label] += 1
            n_class_samples[label] += 1

        acc = 100.0 * n_correct / n_samples
        print(f'Accuracy of the network: {acc} %')

        for i in range(10):
            acc = 100.0 * n_class_correct[i] / n_class_samples[i]
            print(f'Accuracy of {classes[i]}: {acc} %')
     





Finished Training
Accuracy of the network: 81.0 %
Accuracy of 0: 100.0 %
Accuracy of 6: 100.0 %
Accuracy of 7: 63.63636363636363 %
Accuracy of 2: 50.0 %
Accuracy of 9: 83.33333333333333 %
Accuracy of 8: 100.0 %
Accuracy of 3: 81.81818181818181 %
Accuracy of 4: 83.33333333333333 %
Accuracy of 1: 80.0 %
Accuracy of 5: 81.81818181818181 %
Accuracy of the network: 83.2 %
Accuracy of 0: 100.0 %
Accuracy of 6: 100.0 %
Accuracy of 7: 68.75 %
Accuracy of 2: 54.54545454545455 %
Accuracy of 9: 80.0 %
Accuracy of 8: 100.0 %
Accuracy of 3: 85.71428571428571 %
Accuracy of 4: 87.5 %
Accuracy of 1: 81.25 %
Accuracy of 5: 83.33333333333333 %


In [18]:
def get_distance(im1, im2):
    """
    Used to get the eucledian distance between two 
    images that are already converted to 2d-numpy arrays.  
    Taken from the Cybernetics and AI-webpage (https://cw.fel.cvut.cz/wiki/courses/be5b33kui/labs/machine_learning/dist)
    Inputs: 
        im1 and im2: two images represented as numpy-arrays 
    Output: 
        number that corresponds to the euclidean distance between the images
    """ 
    diff = im1.astype(int).flatten() - im2.astype(int).flatten()
    d2 = np.linalg.norm(diff)
    return d2 

In [2]:
def get_most_likely_value(neighbours_list): 
    """
    Finds the most likely value taken from a list, and returns it. 
    Code taken from here: https://www.geeksforgeeks.org/python-find-most-frequent-element-in-a-list/
    """
    occurence_count = Counter(neighbours_list)
    return occurence_count.most_common(1)[0][0]

In [5]:
def features_in_pic(pixel_average, n): 
    """
    Returns a number between 1 and n, based on what number between 0 and 255 the numbers of the pixel average is. 
    Input: 
        pixel_average: The average of some pixels that is to be classified 
    Output: 
        pixel_class: a number between 1 and n
    """
    for pixel_class in range(1, n+1): 
        if(pixel_average < 256*pixel_class/n): 
            return pixel_class
    return n 

In [3]:
def minimize_2d_pic(picture, picture_width, n):
    """
    Returns a representation of the picture that does not contain the same amount of information as the original picture, 
    but at the same time it is then easier to classify fast. 
    
    Inputs: 
        picture: the picture 
        picture_width: the width of the picture 
        n: the number of features we want to divide the picture into 
    
    Output: 
        minimized_im: the minimized image 
    """
    #pitcuren er arrayen vi får ut! 
    #im2d = np.array(picture)
    #how many times we want to split the image, which gives how much we minimize it
    im_array_split = picture_width//(picture_width//(picture_width//8))
    minimized_im = np.zeros((((picture_width+1)//im_array_split)**2,))
    minimized_im_num = 0
    #want to go through the picture in 2d, to find the neighbours both in the row and the column, 
    #thereby getting a more valid average than in 1d.
    for pixel_i in range(im_array_split, picture_width+1, im_array_split): 
        for pixel_j in range(im_array_split, picture_width+1, im_array_split): 
            #averages over small parts of the picture 
            pixel_average = np.average(picture[pixel_j-im_array_split:pixel_j,pixel_i-im_array_split:pixel_i])
            pixel_feature = features_in_pic(pixel_average, n)
            minimized_im[minimized_im_num] = pixel_feature  
            minimized_im_num += 1 
    return minimized_im

In [4]:
def features_in_pic(pixel_average, n): 
    """
    Returns a number between 1 and n, based on what number between 0 and 255 the numbers of the pixel average is. 
    Input: 
        pixel_average: The average of some pixels that is to be classified 
    Output: 
        pixel_class: a number between 1 and n
    """
    for pixel_class in range(1, n+1): 
        if(pixel_average < 256*pixel_class/n): 
            return pixel_class
    return n 

In [25]:

import copy
def get_simplified_pic_output_values(training_tensor, n): 
    """
    Gets a dictionary, and gives out a dictionary containing simplified items corresponding 
    to the simplified versions of the pictures that were there previously. 
    
    Inputs: 
        training_dict: dictionary with training sets
        path: the path the images are from 
        n: pixel resolution ish
    Output: 
        simplified_training_dict: training dictionary with simpler resolution
        simplified_test_dict: test dictionary with simpler resolution
        
        simplified_test_dict: Navn på bilde som key, liste med piksel som item- 
        simplified_training_dict: ground truth value som key, liste med lister med piksel som item
    """
    new_tensor = copy.deepcopy(training_tensor)
    i = 0 
    for picture_and_label in training_tensor: 
        picture = picture_and_label[0]
        picture_width = 28
        minimized_picture = minimize_2d_pic(picture, picture_width, n)
        new_tensor[i][0] = minimized_picture
        i += 1
        minimized_pic_length = len(minimized_picture)
    return new_tensor, minimized_pic_length

In [26]:
def get_test_dsv_file(test_dictionary, output_path): 
    """
    Used to write data from a dictionary into a dsv-file. 
    Code use is partially taken from here: https://www.geeksforgeeks.org/write-a-dictionary-to-a-file-in-python/
    """
    test_abs_path = os.path.abspath(os.path.dirname(__file__))
    test_path = os.path.join(test_abs_path, output_path)  
    with open(test_path, "w") as tp: 
        for pic_name, predicted_value in test_dictionary.items(): 
            tp.write("%s:%s\n" % (pic_name, predicted_value))
            
def get_accurracy(): 
    return 1 


In [27]:
def knn_test(k, n): 
    """
    Basically runs the knn-algorithm on the data, and returns the predicted result in a dsv-file in the 
    output path. 
        
    Inputs: 
        k: number of neighbours we want to check with
        training_path: the path containing training examples 
        test_path: the path containing examples for testing/predicting 
        output_path: the path showing the predicted results on the test data
        n: number of features we want in the picture 
    Output: 
        test_dict_simplified: dictionary with the predicted results. Does not really have to be here as the 
        results are in a dsv-file anyway, but nice for checking
    """
    #gt_and_testimages
    #gt_and_trainingimages
    
    training_tensor = gt_and_trainingimages[0:10]
    test_tensor = gt_and_testimages[0:10]
    training_tensor_simplified = get_simplified_pic_output_values(training_tensor, n)
    test_tensor_simplified = get_simplified_pic_output_values(test_tensor, n)
    checked = False
    i = 0 
    
    for test_image_tensor in test_tensor_simplified: 
        j= 0 
        k_most_likely = [np.inf for _ in range(k*2)]
        for training_image_tensor in training_tensor_simplified: 
            checked = False
            eucledian_distance = get_distance(test_image_tensor[0][i], training_image_tensor[0][j])
            j += 1
            for kml_index in range(0, len(k_most_likely), 2): 
                if(eucledian_distance < k_most_likely[kml_index] and not checked): 
                    checked = True
                    k_most_likely[kml_index] = eucledian_distance
                    k_most_likely[kml_index+1] = ground_truth_value
        test_image_tensor[0][i] = get_most_likely_value(k_most_likely[1::2]) 
        i += 1
    return test_image_tensor
    
   

In [29]:
knn_test(4, 12)

  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)
