# Production stage: use files 0-6 for model traning

In [1]:
!ls raw_data


data_gathered_at_0.csv	   data_gathered_at_3.csv  initial_training_data.csv
data_gathered_at_1.csv	   data_gathered_at_4.csv  test_data.csv
data_gathered_at_2.csv	   data_gathered_at_5.csv  validation_test_data.csv
data_gathered_at_2_bk.csv  data_gathered_at_6.csv


In [2]:
!mkdir accum_data

mkdir: cannot create directory ‘accum_data’: File exists


In [3]:
!cp raw_data/initial_training_data.csv accum_data/accumulated_data.csv


In [5]:
!ls -l accum_data/

total 2620
-rw-r--r-- 1 root root 2679360 May 30 12:42 accumulated_data.csv


In [6]:
import pandas as pd
import numpy as np
import os

from tensorflow.keras import models
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from tensorflow.keras.optimizers import RMSprop,Adam
from tensorflow.keras.utils import to_categorical


raw_data_path = 'raw_data/'
accum_data_path = 'accum_data/'
accum_file_name = accum_data_path+'accumulated_data.csv'

def create_model():
    model = models.Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(48, 48, 1)))
    model.add(MaxPool2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPool2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dense(7, activation='softmax'))
    model.compile(optimizer=Adam(lr=1e-3), loss='categorical_crossentropy', metrics=['accuracy'])
    return model


def prepare_data(data):
    """ Prepare data for modeling 
        input: data frame with labels und pixel data
        output: image and label array """
    
    image_array = np.zeros(shape=(len(data), 48, 48))
    image_label = np.array(list(map(int, data['Emotion'])))
    
    for i, row in enumerate(data.index):
        image = np.fromstring(data.loc[row, 'Pixels'], dtype=int, sep=' ')
        image = np.reshape(image, (48, 48))
        image_array[i] = image
        
    return image_array, image_label


def data_to_tf_data(df):
    image_array, image_label = prepare_data(df)
    images = image_array.reshape((image_array.shape[0], 48, 48, 1))
    images = images.astype('float32')/255
    labels = to_categorical(image_label)
    return images, labels


In [7]:
def add_to_accum_data(nmb):
    accum_data = pd.read_csv(accum_file_name)
    next_data_file = raw_data_path+'data_gathered_at_' + str(nmb)+'.csv'
    add_data = pd.read_csv(next_data_file)
    accum_data = accum_data.append(add_data, ignore_index = True)
    accum_data.to_csv(accum_file_name)


In [17]:
nmb = 1
add_to_accum_data(nmb)


In [18]:
train = pd.read_csv(accum_file_name)
train_images, train_labels = data_to_tf_data(train)

val = pd.read_csv(raw_data_path+'validation_test_data.csv')
val_images, val_labels = data_to_tf_data(val)


model = create_model()
class_weight = dict(zip(range(0, 7), (((train['Emotion'].value_counts()).sort_index())/len(train['Emotion'])).tolist()))
history = model.fit(train_images, train_labels,
                    validation_data=(val_images, val_labels),
                    class_weight = class_weight,
                    epochs=12,
                    batch_size=64)

df = pd.read_csv(raw_data_path+'test_data.csv')
test_images, test_labels = data_to_tf_data(df)
test_loss, test_acc = model.evaluate(test_images, test_labels)



Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


In [11]:
from tensorflow import keras

model.save("initial_model")


INFO:tensorflow:Assets written to: initial_model/assets


In [12]:
!dvc add initial_model/


Adding...                                                                       
![A
Computing file/dir hashes (only done once)            |0.00 [00:00,      ?md5/s][A
                                                                                [A
![A
                                                      |0.00 [00:00,       ?it/s][A
                                                                                [A
![A
  0%|          |Saving files                          0/4 [00:00<?,     ?file/s][A
                                                                                [A
![A
  0%|          |.oPt3T5N5yhYqrbVfAjNGGY.tmp           0/1 [00:00<?,       ?it/s][A
                                                                                [A
![A
  0%|          |.fqFQgoUXRF38Q4bm9UFg26.tmp     0.00/164k [00:00<?,       ?it/s][A
                                                                                [A
![A
  0%|          |.HMLJeamqTmrvkHcTgrkKLr.tmp    0.

In [13]:
!git add initial_model.dvc

In [14]:
!git status

On branch master
Your branch is ahead of 'origin/master' by 2 commits.
  (use "git push" to publish your local commits)

Changes to be committed:
  (use "git reset HEAD <file>..." to unstage)

	[32mmodified:   initial_model.dvc[m

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git checkout -- <file>..." to discard changes in working directory)

	[31mmodified:   FinalProj2_Initial.ipynb[m
	[31mmodified:   FinalProj2_production.ipynb[m

Untracked files:
  (use "git add <file>..." to include in what will be committed)

	[31m.ipynb_checkpoints/[m
	[31mUntitled.ipynb[m
	[31maccum_data/[m
	[31mmlruns/[m



In [15]:
!git add FinalProj2_Initial.ipynb FinalProj2_production.ipynb

In [16]:
!git commit -m "Model trained wit data at_0"

[master d278e62] Model trained wit data at_0
 3 files changed, 384 insertions(+), 245 deletions(-)
