In [1]:
from dataset.dataset import WhatsCookingDataset
from dataset.dataset import WhatsCookingStemmedDataset
from dataset.dataset import WhatsCookingStemmedSeparatedDataset

from base_solver import BaseSolver
from example_method_file.example_method import ExampleSolver

from base_data_processor import BaseDataProcessor
from processors.simple_ingredients_encoder import SimpleIngredientsEncoder
from processors.tf_idf import TfIdf

from SVM import SVC_solver
from sklearn.feature_extraction.text import TfidfVectorizer
import importlib  # for reloading module

In [2]:
# dataset loading
dataset = WhatsCookingDataset()
train_y = [dataset.cuisine2id[cuisine.cuisine] for cuisine in dataset.cuisines]

test_cuisines = dataset.load_test_file()

# load stemmed dataset
dataset_stemmed = WhatsCookingStemmedSeparatedDataset(stem=False)
train_x_stemmed = dataset_stemmed.cuisines

train_y_stemmed = [
    dataset_stemmed.cuisine2id[cuisine.cuisine] 
    for cuisine in dataset_stemmed.cuisines
]

test_cuisines_stemmed = dataset_stemmed.load_test_file()

  0%|          | 0/39774 [00:00<?, ?it/s]

Loading What's Cooking training dataset ...


100%|██████████| 39774/39774 [00:00<00:00, 298582.92it/s]
100%|██████████| 9944/9944 [00:00<00:00, 952088.91it/s]
  0%|          | 0/39774 [00:00<?, ?it/s]

Successfully loaded What's Cooking training dataset!
Loading What's Cooking testing dataset ...
Successfully loaded What's Cooking testing dataset!
Loading and stemming separated What's Cooking training dataset ...


100%|██████████| 39774/39774 [00:24<00:00, 1591.80it/s]
  2%|▏         | 154/9944 [00:00<00:06, 1530.10it/s]

Successfully loaded stemmed and separated What's Cooking training dataset!
# of cuisines = 20; # of ingredients = 3082
Loading and stemming separated What's Cooking testing dataset ...


100%|██████████| 9944/9944 [00:06<00:00, 1607.42it/s]

Successfully loaded stemmed and separated What's Cooking testing dataset!





In [3]:
# pre-processing
train_as_text = [' '.join(cuisine.ingredients).lower() for cuisine in dataset_stemmed.cuisines]
test_as_text = [' '.join(cuisine.ingredients).lower() for cuisine in test_cuisines_stemmed]
tfidf_enc = TfidfVectorizer(binary=True)
train_x_tfidf = tfidf_enc.fit_transform(train_as_text).astype('float16')
test_x_tfidf = tfidf_enc.transform(test_as_text).astype('float16')

In [4]:
# training and testing
importlib.reload(SVC_solver) 

s = SVC_solver.SVCSolver(dataset, method='svc_ovr')
print('Now solving using SVC with {}'.format(s.method))
s.train(train_x_tfidf, train_y_stemmed)
s.test(test_x_tfidf, test_cuisines)

Now solving using SVC with svc_ovr
Training started...
training time: 518.0598945617676 seconds
training score: 0.9995977271584452
Saving model...
Testing started...
