In [None]:
#mount your drive first - you can do it once
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#importing necessary modules
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import os

In [None]:
#let's read the main csv file from our drive
data_path = "/content/drive/MyDrive/ML_Project_Folder/project/datasets/fer2013"
csv_path = os.path.join(data_path, 'fer2013.csv')
data_df = pd.read_csv(csv_path)

In [None]:
#shuffle indices of the training set separate the test data from the validation data
#let's also add some random.seed for the reproducibility of the code
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(data_df['pixels'], data_df['emotion'], test_size=0.2, shuffle=True, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_val, y_val, test_size=0.5, shuffle=False, random_state=42)

In [None]:
#convert each of the training and the validation datasets into dataframes
new_train = pd.DataFrame(data=X_train)
new_train['emotion'] = y_train
new_val = pd.DataFrame(data=X_val)
new_val['emotion'] = y_val
new_test = pd.DataFrame(data=X_test)
new_test['emotion'] = y_test

In [None]:
#now make directories to store each of the images of each categories from the pixels
os.mkdir(os.path.join(data_path, 'train'))
os.mkdir(os.path.join(data_path, 'validation'))
os.mkdir(os.path.join(data_path, 'test'))

In [None]:
#WARNING - EXECUTE THIS CELL ONLY ONCE, create a directory of each of the emotions
em_dict = {0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy', 4: 'sad', 5: 'surprise', 6: 'neutral'}
os.chdir(data_path)
train_path = os.path.join(data_path, 'train')
validation_path = os.path.join(data_path, 'validation')
test_path = os.path.join(data_path, 'test')
for key in em_dict.keys():
  os.chdir(train_path)
  os.mkdir(em_dict[key])
  os.chdir(validation_path)
  os.mkdir(em_dict[key])
  os.chdir(test_path)
  os.mkdir(em_dict[key])

In [None]:
#save the images to their corresponding folder paths
def get_data(data_file, path_file):
  counter_each = {'angry': 0, 'sad': 0, 'happy': 0, 'neutral': 0, 'disgust': 0, 'fear': 0, 'surprise': 0}
  for (ind, row) in data_file.iterrows():
    emotion = em_dict[row['emotion']]
    emotion_path = os.path.join(path_file, emotion)
    img = np.fromstring(row['pixels'], dtype='uint8', sep=' ')
    img = img.reshape((48, 48))
    image = Image.fromarray(img)
    counter_each[emotion] += 1
    filename = f'{emotion}{counter_each[emotion]}.jpg'
    image_path = os.path.join(emotion_path, filename)  
    image.save(image_path)

In [None]:
#saving each images on the training set
get_data(new_train, train_path)

In [None]:
#saving each images on the validation set
get_data(new_val, validation_path)

In [None]:
#saving each images on the test set
get_data(new_test, test_path)