# Method for storing the dataset
Store as pandas DataFrame

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import pandas as pd

DATADIR = "/home/hanna/Documents/KEX/SoCoF/archive/SOCOFing/Real" # directory to collect files from
IMG_SIZE = 90

In [2]:
feat_dict = {
    "M" : 0,
    "F" : 1,
    "Left" : 0,
    "Right" : 1,
    "thumb" : 0,
    "index" : 1,
    "middle" : 2,
    "ring" : 3,
    "little" : 4
    }

In [3]:
def get_attributes(img):
    split_img = img.split('_')
    idty = int(split_img[0])
    gend = feat_dict[split_img[2]]
    hand = feat_dict[split_img[3]]
    fing = feat_dict[split_img[4]]
    return idty,gend,hand,fing

In [4]:
def create_training_data():
    training_data = []
    for img in os.listdir(DATADIR):
        img_array = cv2.imread(os.path.join(DATADIR,img), cv2.IMREAD_GRAYSCALE)
        new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
        new_array = new_array.flatten() # Flatten the array
        idty,gend,hand,fing = get_attributes(img)
        training_data.append([new_array,idty,gend,hand,fing])
    return(training_data)

In [5]:
training_data = create_training_data()

In [6]:
print(len(training_data)) # Should be 6000

6000


In [6]:
# Store data in pandas DataFrame
df = pd.DataFrame(training_data, columns=["Image","Identity","Gender","Hand","Finger"])

In [7]:
df.head()

Unnamed: 0,Image,Identity,Gender,Hand,Finger
0,"[160, 156, 156, 156, 156, 156, 156, 156, 156, ...",100,0,0,1
1,"[160, 156, 156, 156, 156, 156, 156, 156, 156, ...",100,0,0,4
2,"[160, 156, 156, 156, 156, 156, 156, 156, 156, ...",100,0,0,2
3,"[160, 156, 156, 156, 156, 156, 156, 156, 156, ...",100,0,0,3
4,"[160, 156, 156, 156, 156, 156, 156, 156, 156, ...",100,0,0,0


## CSV

$+$ Human readable <br>
$+$ Works with other programs/programming languages <br>
$-$ <b>Doesn't retain type</b> <br>
$-$ Slower <br>
$-$ More disk space <br>

(https://stackoverflow.com/questions/48770542/what-is-the-difference-between-save-a-pandas-dataframe-to-pickle-and-to-csv).

In [9]:
# Save data to csv file "images.csv"
df.to_csv('images2.csv',index=False)

In [9]:
# Read file and store the data in DataFrame df
uncsvd_df = pd.read_csv('images2.csv')

In [10]:
type(uncsvd_df["Image"][0])

str

## Pickle

$+$ <b>Retains types</b> <br>
$+$ Less disk space <br>
$+$ Faster <br>
$-$ Not human readable <br>
$-$ Only Python <br>
(https://stackoverflow.com/questions/48770542/what-is-the-difference-between-save-a-pandas-dataframe-to-pickle-and-to-csv)

In [8]:
# Save data as pickle
df.to_pickle("images.pkl")

In [12]:
# Read pickle data
unpickled_df = pd.read_pickle("images.pkl")

In [13]:
type(unpickled_df["Image"][0])

numpy.ndarray