# ===> you do **NOT** need to go through this notebook if you are using your own dataset  !!!

## readCSV file and then write into **raw** folder with sub_folder with sub_folder name corresponding to it's class label name
## this is necessary because DALI requires the data to be saved to a root folder=**raw** and each subfolder 

## read in the CSV file

In [1]:
import os 
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
filname='./fer2013.csv'

def getData(filname):
	# images are 48x48 = 2304 size vectors
	# N = 35887
	Y = []
	X = []
	first = True
	for line in open(filname):
		if first:
			first = False
		else:
			row = line.split(',')
			Y.append(int(row[0]))
			X.append([int(p) for p in row[1].split()])
	X, Y = np.array(X) / 255.0, np.array(Y) # scaling is already done here
	X=X.reshape(35887,1,48,48)
	return X,Y
X,Y=getData(filname)
X.shape,Y.shape

((35887, 1, 48, 48), (35887,))

In [2]:
cnt_labels=np.unique(Y, return_counts=True)
print(cnt_labels)
print([round(k,3) for k in cnt_labels[1]/len(Y)])

(array([0, 1, 2, 3, 4, 5, 6]), array([4953,  547, 5121, 8989, 6077, 4002, 6198]))
[0.138, 0.015, 0.143, 0.25, 0.169, 0.112, 0.173]


## make directory if it does not exist yet

In [4]:
label_map = {0:'Anger', 1:'Disgust',2:'Fear', 3:'Happy',4: 'Neutral', 5:'Sad', 6:'Surprise'}
os.makedirs('./raw',exist_ok=True)
for i in range(7):
    temp='./raw/'+label_map[i]
    #print(temp)
    os.makedirs(temp,exist_ok=True)
os.listdir('./raw')

./raw/Anger
./raw/Disgust
./raw/Fear
./raw/Happy
./raw/Neutral
./raw/Sad
./raw/Surprise


['Sad', 'Fear', 'Happy', 'Disgust', 'Anger', 'Neutral', 'Surprise']

## write individual image into it's own class sub_folder

In [13]:
X=np.squeeze(X)
import random
label_map = {0:'Anger', 1:'Disgust',2:'Fear', 3:'Happy',4: 'Neutral', 5:'Sad', 6:'Surprise'}
fig=plt.figure(figsize=(4,4))
for i in range(len(X)):
    img=X[i]
    img=np.squeeze(img)
    sub_dir=label_map[Y[i]]
    
    t="./raw/{}/{}_{}.png".format(sub_dir, sub_dir,str(i))
    #print(t)
    plt.imsave(t,img)
print("processing complete !")

processing complete !


<Figure size 288x288 with 0 Axes>

## this is what the folder structure looks like , and it is what DALI requires

In [12]:
from __future__ import print_function
import os.path
import fnmatch

for root, dir ,files in os.walk("raw"):
    depth = root.count('/')
    ret = ""
    if depth >0 :
        ret += " " * (depth -1) + "|-"
    print( ret + root )
    examples_to_show=0
    for items in fnmatch.filter (files, "*"):
        if examples_to_show <5 :
            print(" " * len(ret)+ "|-" + items)
            examples_to_show +=1

raw
|-raw/Sad
  |-Sad_31633.png
  |-Sad_11466.png
  |-Sad_25035.png
  |-Sad_8518.png
  |-Sad_29110.png
|-raw/Fear
  |-Fear_14623.png
  |-Fear_32933.png
  |-Fear_30727.png
  |-Fear_29676.png
  |-Fear_389.png
|-raw/Happy
  |-Happy_4972.png
  |-Happy_18649.png
  |-Happy_1970.png
  |-Happy_22373.png
  |-Happy_25902.png
|-raw/Disgust
  |-Disgust_2795.png
  |-Disgust_17852.png
  |-Disgust_1661.png
  |-Disgust_17771.png
  |-Disgust_30252.png
|-raw/Anger
  |-Anger_25392.png
  |-Anger_7284.png
  |-Anger_13041.png
  |-Anger_4756.png
  |-Anger_28128.png
|-raw/Neutral
  |-Neutral_23432.png
  |-Neutral_24351.png
  |-Neutral_19016.png
  |-Neutral_26975.png
  |-Neutral_5435.png
|-raw/Surprise
  |-Surprise_3681.png
  |-Surprise_1948.png
  |-Surprise_23233.png
  |-Surprise_30530.png
  |-Surprise_30238.png
