# Cuisine Project

In [1]:
#Import stuff
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, accuracy_score, roc_auc_score,f1_score
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [2]:
# Get working directory and change to parent directory out of notebooks folder
import os
import re
cwd = os.getcwd()
cwd = re.sub('/notebooks', '', cwd)
os.chdir(cwd)

In [3]:
#Load data
df = pd.read_json('whats_cooking_data/train.json')

In [4]:
#Look at data
df.head()

Unnamed: 0,cuisine,id,ingredients
0,greek,10259,"[romaine lettuce, black olives, grape tomatoes..."
1,southern_us,25693,"[plain flour, ground pepper, salt, tomatoes, g..."
2,filipino,20130,"[eggs, pepper, salt, mayonaise, cooking oil, g..."
3,indian,22213,"[water, vegetable oil, wheat, salt]"
4,indian,13162,"[black pepper, shallots, cornflour, cayenne pe..."


In [5]:
#Break up lists of ingredients
ingredients = df['ingredients'].apply(pd.Series)

ingredients = ingredients.rename(columns = lambda x : 'ingredient_' + str(x))

ingredients.head()

Unnamed: 0,ingredient_0,ingredient_1,ingredient_2,ingredient_3,ingredient_4,ingredient_5,ingredient_6,ingredient_7,ingredient_8,ingredient_9,ingredient_10,ingredient_11,ingredient_12,ingredient_13,ingredient_14,ingredient_15,ingredient_16,ingredient_17,ingredient_18,ingredient_19,ingredient_20,ingredient_21,ingredient_22,ingredient_23,ingredient_24,ingredient_25,ingredient_26,ingredient_27,ingredient_28,ingredient_29,ingredient_30,ingredient_31,ingredient_32,ingredient_33,ingredient_34,ingredient_35,ingredient_36,ingredient_37,ingredient_38,ingredient_39,ingredient_40,ingredient_41,ingredient_42,ingredient_43,ingredient_44,ingredient_45,ingredient_46,ingredient_47,ingredient_48,ingredient_49,ingredient_50,ingredient_51,ingredient_52,ingredient_53,ingredient_54,ingredient_55,ingredient_56,ingredient_57,ingredient_58,ingredient_59,ingredient_60,ingredient_61,ingredient_62,ingredient_63,ingredient_64
0,romaine lettuce,black olives,grape tomatoes,garlic,pepper,purple onion,seasoning,garbanzo beans,feta cheese crumbles,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,plain flour,ground pepper,salt,tomatoes,ground black pepper,thyme,eggs,green tomatoes,yellow corn meal,milk,vegetable oil,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,eggs,pepper,salt,mayonaise,cooking oil,green chilies,grilled chicken breasts,garlic powder,yellow onion,soy sauce,butter,chicken livers,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,water,vegetable oil,wheat,salt,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,black pepper,shallots,cornflour,cayenne pepper,onions,garlic paste,milk,butter,salt,lemon juice,water,chili powder,passata,oil,ground cumin,boneless chicken skinless thigh,garam masala,double cream,natural yogurt,bay leaf,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [9]:
#Get counts of ingredients in each dish
food_labels = pd.get_dummies(ingredients.apply(pd.Series).stack()).sum(level=0)

In [10]:
#Merge
df_basic = df.merge(food_labels,how='outer',left_index = True,right_index=True)

In [11]:
df_basic.head()

Unnamed: 0,cuisine,id,ingredients,( oz.) tomato sauce,( oz.) tomato paste,(10 oz.) frozen chopped spinach,"(10 oz.) frozen chopped spinach, thawed and squeezed dry",(14 oz.) sweetened condensed milk,(14.5 oz.) diced tomatoes,(15 oz.) refried beans,1% low-fat buttermilk,1% low-fat chocolate milk,1% low-fat cottage cheese,1% low-fat milk,"2 1/2 to 3 lb. chicken, cut into serving pieces",2% low fat cheddar chees,2% low-fat cottage cheese,2% lowfat greek yogurt,2% milk shredded mozzarella cheese,2% reduced-fat milk,25% less sodium chicken broth,33% less sodium cooked deli ham,33% less sodium cooked ham,33% less sodium ham,33% less sodium smoked fully cooked ham,40% less sodium taco seasoning,40% less sodium taco seasoning mix,7 Up,"8 ounc ziti pasta, cook and drain",95% lean ground beef,A Taste of Thai Rice Noodles,Accent Seasoning,Adobo All Purpose Seasoning,Alaskan king crab legs,Alexia Waffle Fries,Alfredo sauce,Amarena cherries,Amaretti Cookies,American cheese,Anaheim chile,Angostura bitters,Argo Corn Starch,Asian chili sauce,Asian sweet chili sauce,Azteca Flour Tortillas,BACARDI® Mixers Margarita Mix,BACARDI® Superior,BREAKSTONE'S Sour Cream,Baileys Irish Cream Liqueur,Balsamico Bianco,Barilla Linguine,Barilla Oven-Ready Lasagne,Barilla Plus Pasta,Bartlett Pear,Belgian endive,Bengali 5 Spice,Bertolli Garlic Alfredo Sauce,Bertolli Tomato & Basil Sauce,Bertolli® Alfredo Sauce,Bertolli® Arrabbiata Sauce,Bertolli® Classico Olive Oil,Best Food's Mayonnaise with Lime Juice,Best Foods® Real Mayonnaise,Better Than Bouillon Chicken Base,Betty Crocker™ oatmeal cookie mix,Biryani Masala,Bisquick Baking Mix,Bisquick Original All-Purpose Baking Mix,Bob Evans Italian Sausage,Bordelaise sauce,Boston lettuce,Boursin,Braeburn Apple,Bragg Liquid Aminos,Breakstone’s Sour Cream,Breyers® Natural Vanilla Ice Cream,Budweiser,Burgundy wine,CURRY GUY Smoked Garam Masala,CURRY GUY Smoked Spicy Salt,California bay leaves,Camellia Red Kidney Beans,Campbell's Condensed Cheddar Cheese Soup,Campbell's Condensed Cream of Chicken Soup,Campbell's Condensed Cream of Mushroom Soup,Campbell's Condensed Tomato Soup,Cara Cara orange,Castelvetrano olives,Cavenders Greek Seasoning,Challenge Butter,Chambord Liqueur,Chartreuse Liqueur,Chianti,Chinese egg noodles,Chinese rice vinegar,Chinese rose wine,Chinese sesame paste,Cholula Hot Sauce,Ciabatta rolls,Cinnamon Toast Crunch Cereal,Classico Pasta Sauce,Cointreau Liqueur,Colman's Mustard Powder,Conimex Wok Olie,Conimex Woksaus Specials Vietnamese Gember Knoflook,Corn Flakes Cereal,Country Crock® Spread,Cox's Orange Pippin,Crisco Pure Canola Oil,Crisco Pure Vegetable Oil,Crispy Rice Cereal,Crystal Farms Butter,Crystal Farms Reduced Fat Shredded Marble Jack Cheese,Crystal Farms Shredded Gouda Cheese,Crystal Farms® Shredded Cheddar Cheese,Crystal Hot Sauce,Daisy Sour Cream,Daiya,DeLallo Extra Virgin Olive Oil,DeLallo Penne Ziti,Diamond Crystal® Kosher Salt,Dole Seven Lettuces,Domino Confectioners Sugar,Domino Light Brown Sugar,Doritos Tortilla Chips,Doubanjiang,Dungeness crabs,Dutch-processed cocoa powder,Earth Balance Buttery Spread,Earth Balance Natural Buttery Spread,Edam,Eggland's Best® eggs,Elmlea Single Light,Elmlea single,Emmenthal,English muffins,English mustard,English toffee bits,Equal Sweetener,Estancia Pinot Noir,Everglades Seasoning,Fisher Pecan Halves,Fisher Pecans,Flora Buttery,Flora Cuisine,Flora Original,Flora pro.activ,Foster Farms boneless skinless chicken breasts,Frangelico,Frank's® RedHot® Original Cayenne Pepper Sauce,Franks Hot Sauce,French bread loaves,French lentils,French mustard,Fuji Apple,Fuyu persimmons,Galliano,Gebhardt Chili Powder,Gochujang base,Godiva Chocolate Liqueur,Gold Medal All Purpose Flour,Gold Medal Flour,Good Seasons Italian Dressing Mix,Gourmet Garden Oregano,Gourmet Garden Parsley,Gourmet Garden garlic paste,Goya Extra Virgin Olive Oil,Goya Ground Cumin,Goya Hot Sauce,Grand Marnier,Greek black olives,Greek dressing,Greek feta,Green Giant Whole Kernel Sweet Corn,Green Giant™ sliced mushrooms,Guinness Beer,Guinness Lager,Haas avocados,Hatch Green Chiles,Hawaiian salt,Heath Candy Bars,Heinz Chili Sauce,Heinz Ketchup,Heinz Tomato Ketchup,Heinz Worcestershire Sauce,Hellmann''s Light Mayonnaise,Hellmann's Dijonnaise Creamy Dijon Mustard,Hellmann's® Real Mayonnaise,Herdez Salsa,Herdez Salsa Casera,Herdez Salsa Verde,Hidden Valley® Farmhouse Originals Italian with Herbs Dressing,Hidden Valley® Greek Yogurt Original Ranch® Dip Mix,Hidden Valley® Original Ranch Salad® Dressing & Seasoning Mix,Hidden Valley® Original Ranch® Dips Mix,Hidden Valley® Original Ranch® Dressing,Hidden Valley® Original Ranch® Light Dressing,Hidden Valley® Original Ranch® Spicy Ranch Dressing,Himalayan salt,Hogue Cabernet Sauvignon,Holland House White Wine Vinegar,Homemade Yogurt,Honeysuckle White® Hot Italian Turkey Sausage Links,Hurst Family Harvest Chipotle Lime Black Bean Soup mix,I Can't Believe It's Not Butter!® All Purpose Sticks,I Can't Believe It's Not Butter!® Spread,Ibarra Chocolate,Imperial Sugar Light Brown Sugar,India Pale Ale,Indian spice,Irish Red ale,Irish whiskey,Italian basil,Italian bread,Italian cheese,Italian cheese blend,Italian herbs,Italian parsley leaves,Italian seasoned breadcrumbs,Italian seasoned diced tomatoes,Italian seasoned panko bread crumbs,Italian turkey sausage,Italian turkey sausage links,JOHNSONVILLE Hot & Spicy Sausage Slices,JOHNSONVILLE® Hot 'N Spicy Brats,Jack Daniels Whiskey,Jagermeister Liqueur,Jamaican allspice,Jameson Irish Whiskey,Jameson Whiskey,Japanese Mayonnaise,Japanese mountain yam,Japanese rice vinegar,Japanese soy sauce,Japanese turnips,Jarlsberg,Jasmine brown rice,Jell-O Gelatin,Jell-O Gelatin Dessert,Jif Creamy Peanut Butter,Jiffy Corn Muffin Mix,Jimmy Dean All Natural Regular Pork Sausage,Jimmy Dean Pork Sausage,Johnsonville Andouille,Johnsonville Andouille Dinner Sausage,Johnsonville Andouille Fully Cooked Sausage,Johnsonville Hot & Spicy Breakfast Links,Johnsonville Mild Italian Sausage Links,Johnsonville Smoked Sausage,Johnsonville® Mild Italian Ground Sausage,...,whipped butter,whipped cream,whipped cream cheese,whipped dessert topping,whipped topping,whipping cream,whipping heavy cream,whiskey,white almond bark,white arborio rice,white asparagus,white baking bar,white beans,white bread,white bread crumbs,white bread flour,white bread slices,white button mushrooms,white cabbage,white cake mix,white cannellini beans,white cheddar cheese,white cheese,white chocolate,white chocolate chips,white corn,white corn syrup,white corn tortillas,white cornmeal,white creme de cacao,white distilled vinegar,white fleshed fish,white flour,white frostings,white grape juice,white grapefruit,white grapefruit juice,white hominy,white italian tuna in olive oil,white kidney beans,white miso,white mushrooms,white onion,white peaches,white pepper,white peppercorns,white poppy seeds,white quinoa,white radish,white rice,white rice flour,white rice vinegar,white rum,white sandwich bread,white sesame seeds,white sugar,white tequila,white truffle oil,white tuna,white tuna in water,white vermouth,white vinegar,white wine,white wine vinegar,white zinfandel,whitefish,whitefish fillets,whole allspice,whole almonds,whole baby okra,whole chicken,whole cloves,whole crab,whole cranberry sauce,whole garam masala,whole grain English muffins,whole grain baguette,whole grain bread,whole grain buns,whole grain dijon mustard,whole grain mustard,whole grain pasta,whole grain rice,whole grain roll,whole grain rotini,whole grain spelt flour,whole grain thin spaghetti,"whole kernel corn, drain",whole milk,whole milk greek yogurt,whole milk ricotta cheese,whole milk yoghurt,whole nutmegs,whole okra,"whole peel tomatoes, undrain and chop",whole peeled tomatoes,whole peppercorn,whole snapper,whole turkey,whole wheat angel hair pasta,whole wheat baguette,whole wheat berries,whole wheat bread,whole wheat bread cubes,whole wheat bread dough,whole wheat bread flour,whole wheat bread slices,whole wheat bread toasted,whole wheat breadcrumbs,whole wheat buns,whole wheat cereal,whole wheat couscous,whole wheat crackers,whole wheat dough,whole wheat english muffins,whole wheat fettuccine,whole wheat flour,whole wheat french bread,whole wheat fusilli,whole wheat hamburger buns,whole wheat lasagna noodles,whole wheat linguine,whole wheat orzo,whole wheat pasta,whole wheat pasta shells,whole wheat pastry flour,whole wheat peasant bread,whole wheat penne,whole wheat penne pasta,whole wheat penne rigate,whole wheat pita,whole wheat pita bread,whole wheat pita bread rounds,whole wheat pita pockets,whole wheat pita rounds,whole wheat pizza crust,whole wheat pizza dough,whole wheat potato buns,whole wheat rigatoni,whole wheat rotini,whole wheat rotini pasta,whole wheat sandwich bread,whole wheat seasoned breadcrumbs,whole wheat sourdough bread,whole wheat spaghetti,whole wheat spaghetti noodles,whole wheat spaghettini,whole wheat submarine loaves,whole wheat thin italian pizza crust,whole wheat thin spaghetti,whole wheat tortilla wraps,whole wheat tortillas,whole wheat uncooked lasagna noodles,wholemeal flour,wide egg noodles,wide rice noodles,wieners,wild asparagus,wild garlic,wild mushrooms,wild rice,wild salmon,wildflower honey,wine,wine syrup,wine vinegar,winesap,wing sauce,winter melon,winter savory,winter squash,wish bone guacamol ranch dress,wish bone ranch dress,wish bone red wine vinaigrett dress,wish-bone,wish-bone light asian sesame ginger vinaigrette dressing,wish-bone light country italian dressing,won ton skins,won ton wrappers,wondra,wondra flour,wonton noodles,wonton skins,wonton wrappers,wood ear mushrooms,wood mushrooms,worcestershire sauce,worcestershire sauce low sodium,xanthan gum,xuxu,yaki-nori,yam bean,yam noodles,yams,yardlong beans,yeast,yeast extract,yellow bean sauce,yellow bell pepper,yellow cake mix,yellow chives,yellow corn,yellow corn meal,yellow crookneck squash,yellow curry paste,yellow food coloring,yellow heirloom tomatoes,yellow hominy,yellow lentils,yellow miso,yellow mustard,yellow mustard seeds,yellow onion,yellow peas,yellow peppers,yellow rice,yellow rock sugar,yellow split peas,yellow squash,yellow summer squash,yellow tomato,yellowfin,yellowfin tuna,yellowtail,yellowtail snapper fillets,yoghurt,yoghurt natural low fat,yogurt cheese,yogurt dressing,yogurt low fat,yolk,yoplait,young coconut meat,young leeks,young nettle,yu choy,yuca,yucca,yucca root,yukon gold,yukon gold potatoes,yuzu,yuzu juice,za'atar,zest,zesty italian dressing,zinfandel,ziti,zucchini,zucchini blossoms
0,greek,10259,"[romaine lettuce, black olives, grape tomatoes...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,southern_us,25693,"[plain flour, ground pepper, salt, tomatoes, g...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,filipino,20130,"[eggs, pepper, salt, mayonaise, cooking oil, g...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,indian,22213,"[water, vegetable oil, wheat, salt]",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,indian,13162,"[black pepper, shallots, cornflour, cayenne pe...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
X_train, X_val, y_train, y_val = train_test_split(df_basic.drop(['cuisine','id','ingredients'],axis='columns'),df_basic['cuisine'],test_size=0.2,random_state=2)

In [None]:
lr_model = LogisticRegression(C=.15)
lr_model.fit(X_train,y_train)
y_pred = lr_model.predict(X_val)

y_pred_tr = lr_model.predict(X_train)

This is to build out a function to quickly run various model types. Question of the extent to which we allow for something like specification of C values and what we want the output to be? I assume we are okay with the random_state being set to 2 such that then every split should then be the same even if the structure of the dataframe being used changes, i.e. we add columns?

In [21]:
def model_test(dataframe, model_type):
    
    # Set up train test data
    X_train, X_val, y_train, y_val = train_test_split(dataframe.drop(['cuisine','id','ingredients'],axis='columns'),dataframe['cuisine'],test_size=0.2,random_state=2)
    
    # Run logistic regression
    if model_type=='Logistic Regression':
        lr_model = LogisticRegression(C=.15)
        lr_model.fit(X_train,y_train)
        y_pred = lr_model.predict(X_val)

        y_pred_tr = lr_model.predict(X_train)
    
    return {'Training Data': y_train, 'Training prediction': y_pred_tr, 'Test Data':y_val, 'Test prediction':y_pred}

In [22]:
basic_model = model_test(df_basic, 'Logistic Regression')



In [23]:
def model_output(model):
    return print("Train Logistic Accuracy: "+ str(accuracy_score(model['Training Data'], model['Training prediction']))+'\n'+
                 "Test Logistic Accuracy: "+str(accuracy_score(model['Test Data'], model['Test prediction'])))

In [24]:
model_output(basic_model)

Train Logistic Accuracy: 0.7996480090511958
Test Logistic Accuracy: 0.7505971087366436


Cleaning data:
    1. Remove measurements? I don't think there's a reason to have things like 10 oz in there right? This probably just confuses matching ingredients
    2. Remove other descriptives? In just looking at the head of the df you see that some recipes use 'pepper' and others 'black pepper', perhaps there is some NLP way to remove common adjectives like this or to group these together.
    3. Get some tfidf scores on ingredients to ranks them based on uniqueness.

In [None]:
# This will extract the 'check' words/symbols into their own column from the original data so we can get a sense 
# of the patterns for things like 10 oz and ground black pepper. This is a pretty blunt way to get at this, perhaps
# we can think of creating a list of the ingredients and then just summing how many times we see it and look at that 
# list.
check_for_words = ['\\d', 'ground', 'black', '\\(.*\\)']