# 3. Create a digit embedding space
The third task is to use the trained network as an embedding space for images of written symbols. In this case, you'll use it to differentiate images of the greek letters alpha, beta, and gamma. Make new code files from the prior tasks. Read in your trained network as the first step and make a submodel that includes everything but the output layer..

embedding space just means everything up until the classification layer?


In [None]:
# to move files
import os
import shutil
from os import walk
from os.path import exists

# import previous notebook
import nbimporter
import Task1AE as Note1AE
import Task1FG as Note1FG

# to save to csv
import numpy as np
import pandas as pd

import torch
import torch.optim as optim

import torchvision
from torchvision import datasets, transforms
from matplotlib import pyplot as plt



### A. Create a greek symbol data set
Write a program to read in the images, scale them down to 28x28, convert them to greyscale, invert the intensities


In [None]:
from enum import Enum
class Alphabet(Enum):
    ALPHA = 0
    BETA= 1
    GAMMA = 2

In [None]:
# a function that create category folders inside the dataset folder
# based on the filename. Then move the filenames to the correct folder
def move_filenames_to_category_folder(dataset_name):
    
    # images/greek/
    target_path = "images/" + dataset_name + "/"
    
    # 1. get all filenames in the folder
    all_filenames = []
    for (dirpath, dirnames, filenames) in walk(target_path):
        if(dirpath == target_path):
            # get unique filenames
            all_filenames = filenames

    # 2. grab all the letters
    alphabet = []
    for filename in all_filenames:
        letter = filename.split("_")[0]
        alphabet.append(letter)

    # 3. remove duplicates
    alphabet = list(set(alphabet))
    
    # 4. create unique folder
    for letter in alphabet:
        try:
            # create folder
            os.mkdir(target_path + letter)
        except OSError as error: 
            print(error)  
        
    # 5. loop through each files again
    for filename in os.listdir(target_path):
        letter_bit = filename.split("_")[0]
        
        if (letter_bit in alphabet and len(filename.split("_")) > 1):
            old_path = target_path + filename
            new_path = target_path + letter_bit + "/" + filename 
            # move this file to the correct category folder
            shutil.move(old_path, new_path)
            

In [None]:
# A function that takes in a dataset name from local "images/" directory and batch size and
# upload the images to PyTorch's ImageFolder
# it returns a dataLoader object
def upload_to_ImageFolder(dataset_name, batch_size):
    # - set up the transformation for our dataset
    transform = transforms.Compose(
                [transforms.Resize(28),
                 transforms.Grayscale(),
                 transforms.RandomInvert(p=1),
                 transforms.ToTensor(),
                 # normalize with mean and std
                 transforms.Normalize((0.1307,), (0.3801,)),
                ])

    # - create the dataset from our images folder to datasets
    dataset = datasets.ImageFolder("images/" + dataset_name, transform=transform)

    # - get dataloader for our 10 images
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=30, shuffle=True)
    return dataloader

In [None]:
# A function that, given a dataloader will extract the data and target 
# and save them both as two separate csv files. 
# if isOverwrite is false, it will not write anything if data already exists
def save_data_as_csv(dataloader, dataset_name, isOverwrite):
    data_filename = dataset_name + '.csv'
    target_filename = dataset_name + '_target.csv'
    
    if(isOverwrite):
        isWrite = True
    else:
        # not overwrite means if file already exists dont write
        isWrite = not(os.path.exists(data_filename))
    

    if(isWrite):
        # 1. load the data
        data, target = next(iter(dataloader))
        csv_np = np.array([])


        # 2. create empty csv file with the headers
        header_list = list(map(str, range(0, 784)))
        header_str = ','.join(header_list)
        with open(data_filename, 'w') as csvfile:
            np.savetxt(csvfile, [], header=header_str,
                    delimiter=',', fmt='%s', comments='')

        with open(target_filename, 'w') as csvtargetfile:
            np.savetxt(csvtargetfile, [], header="category",
                    delimiter=',', fmt='%s', comments='')

        # 3. loop through each image and append to csv
        for i in range(len(data)):
            print("\nindex:>>>>", i)
            # - display the image info
            img_mat = data[i][0]
            ground_truth = target[i].numpy()
            print("ground truth:", Alphabet(ground_truth))
            plt.imshow(img_mat, cmap="gray", interpolation="none")
            plt.show()

            # - create a new csv row
            csv_row_np = np.array([])

            # - for each row in a single image
            for img_row in img_mat:

                # - append it as a single row
                img_row_np = img_row.numpy()
                csv_row_np = np.append(csv_row_np, [img_row_np])

            # - append this image row and target in csv file
            with open(data_filename, 'a') as csvfile:
                np.savetxt(csvfile, [csv_row_np], delimiter=',', fmt='%s', comments='')

            with open(target_filename, 'a') as csvtargetfile:
                np.savetxt(csvtargetfile, [ground_truth],
                        delimiter=',', fmt='%s', comments='')

     

In [None]:
# 1. sort filenames to category folder
dataset_name = "greek"
move_filenames_to_category_folder(dataset_name)    

# 2. upload files to Pytorch ImageFolder
batch_size = 30
dataloader = upload_to_ImageFolder(dataset_name, batch_size)

# 3. check the single value in OWN dataset
data, target = next(iter(dataloader))
print("single image>>>>>: ", data[0])
print("shape: ", data[0].shape)

# 3. display image
Note1AE.display_sample_images(dataloader)

# 4. save data as csv
save_data_as_csv(dataloader, "greek", isOverwrite=True)

## B. Create Truncated Model