In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.callbacks import EarlyStopping
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import KFold

In [2]:
df = pd.read_csv('./data/veg_model_data.csv')
df.head()

Unnamed: 0,vegetable,flavor_family,texture,prepared
0,artichokes,mild/neutral,soft-chewy,roast
1,artichokes,mild/neutral,soft-chewy,steam
2,arugula,spicy,soft-chewy,salad
3,arugula,spicy,soft-chewy,salad
4,asparagus,grassy,crunchy,salad


In [3]:
df_dummy = pd.get_dummies(df, columns=df.columns[1:], drop_first=True)

In [4]:
X = df_dummy.drop(columns='vegetable')
y = df_dummy['vegetable']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [6]:
rfc = RandomForestClassifier()

In [7]:
rfc.fit(X_train, y_train)

RandomForestClassifier()

In [8]:
rfc.score(X_test, y_test)

0.09090909090909091

In [9]:
knn = KNeighborsClassifier(n_neighbors=8)

In [10]:
knn.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=8)

In [11]:
knn.score(X_test, y_test)

0.09090909090909091

In [12]:
import category_encoders as ce

In [13]:
encoder = ce.OneHotEncoder(cols=df.columns[1:])
df_enc = encoder.fit_transform(df)

  elif pd.api.types.is_categorical(cols):


In [14]:
veg = ['Artichokes', 'Arugula', 'Asparagus', 'Bush beans (green beans)',
       'Pole beans(green beans)', 'Beets', 'Bok Choy', 'Broccoli',
       'Brussel Sprouts', 'Cabbage', 'Carrots', 'Cauliflower', 'Celery',
       'Chives', 'Collards', 'Corn', 'Cucumbers', 'Eggplant', 'Garlic', 'Kale',
       'Leeks', 'Lettuce', 'Mustard Greens', 'Onions', 'Parsnips', 'Peas',
       'Peppers', 'Potatoes', 'Pumpkin', 'Radishes', 'Rhubarb', 'Rutabaga',
       'Shallots', 'Spinach', 'Summer Squash', 'Winter Squash', 'Swiss Chard',
       'Tomatoes', 'Turnips']
veg_dict = {k.lower(): v for v, k in enumerate(veg)}
veg_dict

{'artichokes': 0,
 'arugula': 1,
 'asparagus': 2,
 'bush beans (green beans)': 3,
 'pole beans(green beans)': 4,
 'beets': 5,
 'bok choy': 6,
 'broccoli': 7,
 'brussel sprouts': 8,
 'cabbage': 9,
 'carrots': 10,
 'cauliflower': 11,
 'celery': 12,
 'chives': 13,
 'collards': 14,
 'corn': 15,
 'cucumbers': 16,
 'eggplant': 17,
 'garlic': 18,
 'kale': 19,
 'leeks': 20,
 'lettuce': 21,
 'mustard greens': 22,
 'onions': 23,
 'parsnips': 24,
 'peas': 25,
 'peppers': 26,
 'potatoes': 27,
 'pumpkin': 28,
 'radishes': 29,
 'rhubarb': 30,
 'rutabaga': 31,
 'shallots': 32,
 'spinach': 33,
 'summer squash': 34,
 'winter squash': 35,
 'swiss chard': 36,
 'tomatoes': 37,
 'turnips': 38}

In [15]:
df_enc['vegetable'] = df_enc['vegetable'].map(veg_dict)

In [16]:
df_enc

Unnamed: 0,vegetable,flavor_family_1,flavor_family_2,flavor_family_3,flavor_family_4,flavor_family_5,texture_1,texture_2,texture_3,texture_4,texture_5,prepared_1,prepared_2,prepared_3,prepared_4,prepared_5,prepared_6,prepared_7,prepared_8
0,0.0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0
1,0.0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0
2,1.0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0
3,1.0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0
4,2.0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83,38.0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1
84,18.0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0
85,18.0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0
86,21.0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0


In [17]:
X = df_enc.drop(columns='vegetable').astype(float)
y = df_enc['vegetable']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [18]:
tf.random.set_seed(42)

In [19]:
n_input = X_train.shape[1]

In [20]:
model = Sequential()
model.add(Dense(15, input_shape=(n_input,), activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(1, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

early_stop = EarlyStopping(monitor='val_loss', patience=3, verbose=1)

In [21]:
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, callbacks=[early_stop])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 00003: early stopping
