# CloudWine Model Evaluation

In [1]:
import pickle
import json
import yaml

In [2]:
# Clear the runs folder
!rm ./runs/*

In [3]:
def reset_config():
    config = {
      "model": "",
      "args": {
        "data_path": "./data/raw/sample.csv",
        "lowercase": False,
        "remove_punctuation": False,
        "remove_stopwords": False,
        "lemmatize": False,
        "save_model": False,
        "model_dir": "./models/",
        "save_validation": True,
        "validation_dir": "./runs/"
      }
    }
    return config

In [4]:
def set_model(config, model):
    config['model'] = model
    return config
    
def set_nlp_args(config, preprocess):
    config['args']['lowercase'] = preprocess
    config['args']['remove_punctuation'] = preprocess
    config['args']['remove_stopwords'] = preprocess
    config['args']['lemmatize'] = preprocess
    return config

In [5]:
models = ['tfidf', 'doc2vec', 'bert']

In [6]:
for m in models:
    print('Running model ' + m)
    config = reset_config()
    config = set_model(config, m)
    config = set_nlp_args(config, False)

    with open('./config.yaml', "w") as ff:
        yaml.dump(config, ff, default_flow_style=False)

    !python3 train.py -y './config.yaml'
    
    config = reset_config()
    config = set_model(config, m)
    config = set_nlp_args(config, True)

    with open('./config.yaml', "w") as ff:
        yaml.dump(config, ff, default_flow_style=False)

    !python3 train.py -y './config.yaml'

Running model tfidf
[nltk_data] Downloading package stopwords to /Users/elmi/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/elmi/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
2020-06-11 14:47:03,990 gensim.corpora.dictionary INFO     adding document #0 to Dictionary(0 unique tokens: [])
2020-06-11 14:47:03,991 gensim.corpora.dictionary INFO     built Dictionary(12 unique tokens: ['computer', 'human', 'interface', 'response', 'survey']...) from 9 documents (total 29 corpus positions)
2020-06-11 14:47:04,222 transformers.file_utils INFO     PyTorch version 1.5.0 available.
2020-06-11 14:47:04,297 root         INFO     {'data_path': './data/raw/sample.csv', 'lemmatize': False, 'lowercase': False, 'model_dir': './models/', 'remove_punctuation': False, 'remove_stopwords': False, 'save_model': False, 'save_validation': True, 'validation_dir': './runs/'}
2020-06-11 14:47:04,297 root         INFO     Loa

In [7]:
from os import listdir
from os.path import isfile, join

In [17]:
files = [f for f in listdir('./runs') if isfile(join('./runs', f))]
config_list = []

graph = {'tfidf':[0,0], 'doc2vec':[0,0], 'bert':[0,0]}
for fx in files:
    f = './runs/' + fx
    with open(f, "rb") as file:
        config = pickle.load(file)
        if config['args']['lowercase']:
            graph[config['model']][1] = [config['output']]
        else:
            graph[config['model']][0] = [config['output']]
    config_list += [config]

In [18]:
graph

{'tfidf': [[0.5043794755752165], [0.4591584867338468]],
 'doc2vec': [[0.4940810799598694], [0.5658628433942795]],
 'bert': [[0.9015057206153869], [0.9123975753784179]]}

In [19]:
with open('runs/graph_data.pkl', "wb") as pickleFile:
    pickle.dump(graph, pickleFile)

In [15]:
config_list

[{'args': {'data_path': './data/raw/sample.csv',
   'lemmatize': True,
   'lowercase': True,
   'model_dir': './models/',
   'remove_punctuation': True,
   'remove_stopwords': True,
   'save_model': False,
   'save_validation': True,
   'validation_dir': './runs/'},
  'model': 'tfidf',
  'output': 0.4591584867338468},
 {'args': {'data_path': './data/raw/sample.csv',
   'lemmatize': False,
   'lowercase': False,
   'model_dir': './models/',
   'remove_punctuation': False,
   'remove_stopwords': False,
   'save_model': False,
   'save_validation': True,
   'validation_dir': './runs/'},
  'model': 'tfidf',
  'output': 0.5043794755752165},
 {'args': {'data_path': './data/raw/sample.csv',
   'lemmatize': True,
   'lowercase': True,
   'model_dir': './models/',
   'remove_punctuation': True,
   'remove_stopwords': True,
   'save_model': False,
   'save_validation': True,
   'validation_dir': './runs/'},
  'model': 'bert',
  'output': 0.9123975753784179},
 {'args': {'data_path': './data/raw/s