## This file will make the data representable to the deep learning algorithm and also help with the visualization of data. This file just reads the csv data file `sample.csv`, then prepares image pixels from `string` to `numpy` array. Initially, the whole image pixel is just a single string. After running this file, they are saved as numpy array. 

In [1]:
%matplotlib notebook
%load_ext autoreload
from glob import glob
from skimage import io
from matplotlib import style
import matplotlib.pyplot as plt
import pandas as pd
import joblib as jb
import time
import numpy as np

##  Import data


In [2]:
data=pd.read_csv('Data/sample.csv',index_col=0)
data.head()
#Here 'Usage' column is not important. 
#It is a book-keeping for a previous competition.

Unnamed: 0,emotion,pixels,Usage
0,0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,Training
1,0,151 150 147 155 148 133 111 140 170 174 182 15...,Training
2,2,231 212 156 164 174 138 161 173 182 200 106 38...,Training
3,4,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...,Training
4,6,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,Training


In [3]:
data.shape

(100, 3)

## Reformat 'pixels' columns which represent image pixels

In [4]:
def reformat_images(image_data):
    '''
    Remove space from a string and return as numpy array.
    The data type is also define as integer.
    '''
    return np.fromstring(image_data, dtype=int, sep=" ")

In [5]:
images=np.array(list(map(reformat_images,data.iloc[:,1].values)))
images

array([[ 70,  80,  82, ..., 106, 109,  82],
       [151, 150, 147, ..., 193, 183, 184],
       [231, 212, 156, ...,  88, 110, 152],
       ..., 
       [ 15,  16,  19, ..., 218, 226, 229],
       [124, 109,  86, ..., 106, 109, 115],
       [111, 106, 107, ...,  95, 107, 101]])

## Create new dataframe

In [6]:
new_data=pd.DataFrame(images)
new_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2294,2295,2296,2297,2298,2299,2300,2301,2302,2303
0,70,80,82,72,58,58,60,63,54,58,...,159,182,183,136,106,116,95,106,109,82
1,151,150,147,155,148,133,111,140,170,174,...,105,108,95,108,102,67,171,193,183,184
2,231,212,156,164,174,138,161,173,182,200,...,104,138,152,122,114,101,97,88,110,152
3,24,32,36,30,32,23,19,20,30,41,...,174,126,132,132,133,136,139,142,143,142
4,4,0,0,0,0,0,0,0,0,0,...,12,34,31,31,31,27,31,30,29,30


In [7]:
new_data['label']=data.emotion.values

In [8]:
new_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2295,2296,2297,2298,2299,2300,2301,2302,2303,label
0,70,80,82,72,58,58,60,63,54,58,...,182,183,136,106,116,95,106,109,82,0
1,151,150,147,155,148,133,111,140,170,174,...,108,95,108,102,67,171,193,183,184,0
2,231,212,156,164,174,138,161,173,182,200,...,138,152,122,114,101,97,88,110,152,2
3,24,32,36,30,32,23,19,20,30,41,...,126,132,132,133,136,139,142,143,142,4
4,4,0,0,0,0,0,0,0,0,0,...,34,31,31,31,27,31,30,29,30,6


## Save new dataframe to pickle using pandas library

In [11]:
new_data.to_pickle('Data/new_data.pickle')

## Check the file by reloading with pandas

In [12]:
pd.read_pickle('Data/new_data.pickle')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2295,2296,2297,2298,2299,2300,2301,2302,2303,label
0,70,80,82,72,58,58,60,63,54,58,...,182,183,136,106,116,95,106,109,82,0
1,151,150,147,155,148,133,111,140,170,174,...,108,95,108,102,67,171,193,183,184,0
2,231,212,156,164,174,138,161,173,182,200,...,138,152,122,114,101,97,88,110,152,2
3,24,32,36,30,32,23,19,20,30,41,...,126,132,132,133,136,139,142,143,142,4
4,4,0,0,0,0,0,0,0,0,0,...,34,31,31,31,27,31,30,29,30,6
5,55,55,55,55,55,54,60,68,54,85,...,83,77,53,37,34,30,34,30,57,2
6,20,17,19,21,25,38,42,42,46,54,...,36,68,91,85,93,97,99,107,118,4
7,77,78,79,79,78,75,60,55,47,48,...,124,139,150,179,180,175,125,67,68,3
8,85,84,90,121,101,102,133,153,153,169,...,151,111,106,102,99,89,58,73,84,3
9,255,254,255,254,254,179,122,107,95,124,...,176,188,251,252,253,253,254,255,255,2
