# Linear Support Vector Classifier

This notebook trains and evaluates a LinearSVC model on the recipe data. 

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

## Load Data

Load and split one hot encoded data

In [3]:
train_data = pd.read_csv("../data/ohe_train_recipes_v2.csv",index_col="id")

In [4]:
X_train, X_val, y_train, y_val = train_test_split(train_data.drop(columns=['cuisine']),
                                                  train_data['cuisine'],
                                                  test_size=0.3,random_state=22)

## Linear SVC

In [5]:
from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import f1_score

from sklearn.model_selection import train_test_split, GridSearchCV

In [6]:
model = LinearSVC()
model.fit(X_train, y_train)

In [7]:
model.score(X_train, y_train), model.score(X_val, y_val)

(0.9306418591286233, 0.7645185619710048)

## Hyperparameter Tuning

In [40]:
param_grid = {'C': [0.001,0.01, 0.1, 1, 10, 50, 100, 500, 1000,5000],  
              'penalty': ['l1','l2'],
             'loss': ['hinge','squared hinge']} 

grid = GridSearchCV(LinearSVC(), param_grid, refit = True, verbose = 3, n_jobs=-1)

In [41]:
grid.fit(X_train,y_train) 

Fitting 5 folds for each of 40 candidates, totalling 200 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:  9.6min
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed: 17.2min finished


GridSearchCV(estimator=LinearSVC(), n_jobs=-1,
             param_grid={'C': [0.001, 0.01, 0.1, 1, 10, 50, 100, 500, 1000,
                               5000],
                         'loss': ['hinge', 'squared hinge'],
                         'penalty': ['l1', 'l2']},
             verbose=3)

In [42]:
grid.best_params_

{'C': 1, 'loss': 'hinge', 'penalty': 'l2'}

## Test Predictions

Generate predictions for the test set to evaluate model preformance.

In [15]:
test_data = pd.read_csv("../data/ohe_test_recipes_v2.csv",index_col="id")

In [17]:
model = LinearSVC(C=1,loss='hinge',penalty='l2')
model.fit(X_train, y_train)
test_predictions = model.predict(test_data)



In [18]:
pd.Series(test_predictions, index=test_data.index, name='cuisine').to_csv("model_predictions/LinearSVC.csv")
## kaggle score:  0.77182