# Jacob Dorfman
# Final project 2

## Code from train_model.py

In [10]:
import pandas as pd
import numpy as np
import os

from tensorflow.keras import models
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from tensorflow.keras.optimizers import RMSprop,Adam
from tensorflow.keras.utils import to_categorical


raw_data_path = 'raw_data/'

def create_model():
    model = models.Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(48, 48, 1)))
    model.add(MaxPool2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPool2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dense(7, activation='softmax'))
    model.compile(optimizer=Adam(lr=1e-3), loss='categorical_crossentropy', metrics=['accuracy'])
    return model


def prepare_data(data):
    """ Prepare data for modeling 
        input: data frame with labels und pixel data
        output: image and label array """
    
    image_array = np.zeros(shape=(len(data), 48, 48))
    image_label = np.array(list(map(int, data['Emotion'])))
    
    for i, row in enumerate(data.index):
        image = np.fromstring(data.loc[row, 'Pixels'], dtype=int, sep=' ')
        image = np.reshape(image, (48, 48))
        image_array[i] = image
        
    return image_array, image_label


def data_to_tf_data(df):
    image_array, image_label = prepare_data(df)
    images = image_array.reshape((image_array.shape[0], 48, 48, 1))
    images = images.astype('float32')/255
    labels = to_categorical(image_label)
    return images, labels


## Run initial train and test

In [11]:
train = pd.read_csv(raw_data_path+'initial_training_data.csv')
train_images, train_labels = data_to_tf_data(train)

val = pd.read_csv(raw_data_path+'validation_test_data.csv')
val_images, val_labels = data_to_tf_data(val)


model = create_model()
class_weight = dict(zip(range(0, 7), (((train['Emotion'].value_counts()).sort_index())/len(train['Emotion'])).tolist()))
history = model.fit(train_images, train_labels,
                    validation_data=(val_images, val_labels),
                    class_weight = class_weight,
                    epochs=12,
                    batch_size=64)

df = pd.read_csv(raw_data_path+'test_data.csv')
test_images, test_labels = data_to_tf_data(df)
test_loss, test_acc = model.evaluate(test_images, test_labels)


Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.




Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


## Save model

In [12]:
from tensorflow import keras

model.save("initial_model")


INFO:tensorflow:Assets written to: initial_model/assets


In [13]:
import os
from mlflow import log_metric, log_param, log_artifacts
import mlflow


In [14]:
mlflow.create_experiment('FinalProj2_initial')
mlflow.set_experiment('FinalProj2_initial')

## GIT and DVC

In [15]:
!git init

Initialized empty Git repository in /home/ubuntu/notebooks/JacobD/JD_FinalProj2/.git/


In [16]:
!dvc init

Initialized DVC repository.

You can now commit the changes to git.

[31m+---------------------------------------------------------------------+
[0m[31m|[0m                                                                     [31m|[0m
[31m|[0m        DVC has enabled anonymous aggregate usage analytics.         [31m|[0m
[31m|[0m     Read the analytics documentation (and how to opt-out) here:     [31m|[0m
[31m|[0m             <[36mhttps://dvc.org/doc/user-guide/analytics[39m>              [31m|[0m
[31m|[0m                                                                     [31m|[0m
[31m+---------------------------------------------------------------------+
[0m
[33mWhat's next?[39m
[33m------------[39m
- Check out the documentation: <[36mhttps://dvc.org/doc[39m>
- Get help and share ideas: <[36mhttps://dvc.org/chat[39m>
- Star us on GitHub: <[36mhttps://github.com/iterative/dvc[39m>
[0m

In [22]:
!git status

On branch master
Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git restore <file>..." to discard changes in working directory)
	[31mmodified:   FinalProj2_Initial.ipynb[m

Untracked files:
  (use "git add <file>..." to include in what will be committed)
	[31m.ipynb_checkpoints/[m
	[31minitial_model/[m
	[31mmlruns/[m
	[31mraw_data/[m

no changes added to commit (use "git add" and/or "git commit -a")


In [19]:
!git commit -m "Initialize DVC"

[master (root-commit) dd93922] Initialize DVC
 Committer: Ubuntu <ubuntu@ip-172-31-15-107.eu-west-1.compute.internal>
Your name and email address were configured automatically based
on your username and hostname. Please check that they are accurate.
You can suppress this message by setting them explicitly:

    git config --global user.name "Your Name"
    git config --global user.email you@example.com

After doing this, you may fix the identity used for this commit with:

    git commit --amend --reset-author

 9 files changed, 515 insertions(+)
 create mode 100644 .dvc/.gitignore
 create mode 100644 .dvc/config
 create mode 100644 .dvc/plots/confusion.json
 create mode 100644 .dvc/plots/confusion_normalized.json
 create mode 100644 .dvc/plots/default.json
 create mode 100644 .dvc/plots/linear.json
 create mode 100644 .dvc/plots/scatter.json
 create mode 100644 .dvc/plots/smooth.json
 create mode 100644 .dvcignore


In [20]:
!git add FinalProj2_Initial.ipynb train_model.py train_model_2.py

In [21]:
!git commit -m 'Initial source'

[master 5db1af8] Initial source
 Committer: Ubuntu <ubuntu@ip-172-31-15-107.eu-west-1.compute.internal>
Your name and email address were configured automatically based
on your username and hostname. Please check that they are accurate.
You can suppress this message by setting them explicitly:

    git config --global user.name "Your Name"
    git config --global user.email you@example.com

After doing this, you may fix the identity used for this commit with:

    git commit --amend --reset-author

 3 files changed, 453 insertions(+)
 create mode 100644 FinalProj2_Initial.ipynb
 create mode 100644 train_model.py
 create mode 100644 train_model_2.py


In [23]:
!dvc add raw_data/

Adding...                                                                       
![A
Computing file/dir hashes (only done once)            |0.00 [00:00,      ?md5/s][A
                                                                                [A
![A
                                                      |0.00 [00:00,       ?it/s][A
                                                                                [A
![A
  0%|          |Saving files                         0/11 [00:00<?,     ?file/s][A
                                                                                [A
![A
  0%|          |.TXpddg9YwHnw697X7sN64b.tmp     0.00/776k [00:00<?,       ?it/s][A
                                                                                [A
![A
  0%|          |.XSfwHJCDaLzW4owMr9Syym.tmp    0.00/1.04M [00:00<?,       ?it/s][A
                                                                                [A
![A
  0%|          |.FMuakTNyTjMgswto2gKSkt.tmp     0

In [24]:
!git add .gitignore raw_data.dvc

In [25]:
!dvc add initial_model/

Adding...                                                                       
![A
Computing file/dir hashes (only done once)            |0.00 [00:00,      ?md5/s][A
                                                                                [A
![A
                                                      |0.00 [00:00,       ?it/s][A
                                                                                [A
![A
  0%|          |Saving files                          0/4 [00:00<?,     ?file/s][A
                                                                                [A
![A
  0%|          |.UmKDAu5anQxxBbGNqhZ54U.tmp    0.00/17.6k [00:00<?,       ?it/s][A
                                                                                [A
![A
  0%|          |.FgdRrNFoxWkTUxagEbaRPZ.tmp     0.00/164k [00:00<?,       ?it/s][A
                                                                                [A
![A
  0%|          |.h5ULS2kGJJNmYMg3rfpEqu.tmp    0.

In [26]:
!git add .gitignore initial_model.dvc

## Create docker

In [28]:
!pwd

/home/ubuntu/notebooks/JacobD/JD_FinalProj2


In [None]:
!sudo docker run -i -t -p 8886:8886 -v /home/ubuntu/notebooks/JacobD/JD_FinalProj2:/opt/notebooks/data continuumio/miniconda3 /bin/bash -c "/opt/conda/bin/conda install jupyter -y --quiet && /opt/conda/bin/jupyter notebook --notebook-dir=/opt/notebooks --ip='*' --port=8886 --no-browser --allow-root"

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... failed with initial frozen solve. Retrying with flexible solve.
Solving environment: ...working... failed with repodata from current_repodata.json, will retry with next repodata source.
Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: /opt/conda

  added / updated specs:
    - jupyter


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    argon2-cffi-20.1.0         |   py38h27cfd23_1          46 KB
    async_generator-1.10       |     pyhd3eb1b0_0          23 KB
    attrs-21.2.0               |     pyhd3eb1b0_0          46 KB
    backcall-0.2.0             |     pyhd3eb1b0_0          13 KB
    bleach-3.3.0               |     pyhd3eb1b0_0         113 KB
    ca-certificates-2021.5.25  |    

Preparing transaction: ...working... done
Verifying transaction: ...working... done
Executing transaction: ...working... done
[32m[I 09:47:50.577 NotebookApp](B[m Writing notebook server cookie secret to /root/.local/share/jupyter/runtime/notebook_cookie_secret
[32m[I 09:47:50.797 NotebookApp](B[m Serving notebooks from local directory: /opt/notebooks
[32m[I 09:47:50.797 NotebookApp](B[m Jupyter Notebook 6.4.0 is running at:
[32m[I 09:47:50.797 NotebookApp](B[m http://fd0aebf1446b:8886/?token=79deedc51954418288bcfccca5b000d63940c2d1865193fb
[32m[I 09:47:50.797 NotebookApp](B[m  or http://127.0.0.1:8886/?token=79deedc51954418288bcfccca5b000d63940c2d1865193fb
[32m[I 09:47:50.797 NotebookApp](B[m Use Control-C to stop this server and shut down all kernels (twice to skip confirmation).
[35m[C 09:47:50.801 NotebookApp](B[m 
    
    To access the notebook, open this file in a browser:
        file:///root/.local/share/jupyter/runtime/nbserver-1-open.html
    Or copy and

[32m[I 12:02:01.533 NotebookApp](B[m Saving file at /data/FinalProj2_production.ipynb
[32m[I 12:03:38.066 NotebookApp](B[m Saving file at /data/FinalProj2_production.ipynb
2021-05-30 12:03:53.732777: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-05-30 12:03:53.732834: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303)
2021-05-30 12:03:53.732871: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (fd0aebf1446b): /proc/driver/nvidia/version does not exist
2021-05-30 12:03:53.733147: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other 