# CloudWine Model Evaluation

In [1]:
import pickle
import json
import yaml

In [2]:
# Clear the runs folder
!rm ./runs/*

In [3]:
def reset_config():
    config = {
      "model": "",
      "args": {
        "data_path": "./data/raw/winemag-data-130k-v2.csv",
        "lowercase": False,
        "remove_punctuation": False,
        "remove_stopwords": False,
        "lemmatize": False,
        "save_model": False,
        "model_dir": "./models/",
        "save_validation": True,
        "validation_dir": "./runs/"
      }
    }
    return config

In [4]:
def set_model(config, model):
    config['model'] = model
    return config
    
def set_nlp_args(config, preprocess):
    config['args']['lowercase'] = preprocess
    config['args']['remove_punctuation'] = preprocess
    config['args']['remove_stopwords'] = preprocess
    config['args']['lemmatize'] = preprocess
    return config

In [5]:
models = ['tfidf', 'doc2vec', 'bert']

In [None]:
for m in models:
    print('Running model ' + m)
    config = reset_config()
    config = set_model(config, m)
    config = set_nlp_args(config, False)

    with open('./config.yaml', "w") as ff:
        yaml.dump(config, ff, default_flow_style=False)

    !python3 train.py -y './config.yaml'
    
    config = reset_config()
    config = set_model(config, m)
    config = set_nlp_args(config, True)

    with open('./config.yaml', "w") as ff:
        yaml.dump(config, ff, default_flow_style=False)

    !python3 train.py -y './config.yaml'

Running model tfidf
[nltk_data] Downloading package stopwords to /Users/elmi/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/elmi/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /Users/elmi/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/elmi/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
Running model doc2vec
[nltk_data] Downloading package stopwords to /Users/elmi/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/elmi/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
^C
Traceback (most recent call last):
  File "train.py", line 125, in <module>
    main(config)
  File "train.py", line 24, in main
    model.train(corpus)
  File "/Users/elmi/Projects/CloudWine/train/models.py", line 73

In [None]:
from os import listdir
from os.path import isfile, join
files = [f for f in listdir('./runs') if isfile(join('./runs', f))]

In [None]:
f = files[0]
with open(f, "r") as file:
    config = yaml.load(file, Loader=yaml.FullLoader)

    print(config)