In [1]:
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
import math

In [2]:
df_parsed = pd.read_csv('input/df_parsed.csv')
df_recipes = pd.read_csv('input/df_recipes.csv')

In [3]:
df_recipes.head()

Unnamed: 0,recipe_urls,recipe_name,ingredients
0,https://www.jamieoliver.com/recipes/duck-recip...,Roast duck with Marsala gravy,"['1 x 1.6kg whole duck', '2 heaped teaspoons C..."
1,https://www.jamieoliver.com/recipes/vegetable-...,Best-ever Brussels sprouts,"['800 g Brussels sprouts', '2 higher-welfare C..."
2,https://www.jamieoliver.com/recipes/pasta-reci...,Beautiful courgette carbonara,"['6 medium green and yellow courgettes', '500 ..."
3,https://www.jamieoliver.com/recipes/vegetable-...,Roasted black bean burgers,"['1½ red onions', '200 g mixed mushrooms', '10..."
4,https://www.jamieoliver.com/recipes/chicken-re...,Chicken & tofu noodle soup,"['2 shallots', '2 cloves of garlic', '2 cm pie..."


In [7]:
# Define features
cuisines = ["Chinese", "Italian", "Mexican", "French", "Indian", "Japanese", "Thai",  "American"]
cooking_level = ["Easy", "Medium", "Hard"]
cooking_time = [15, 30, 45, 60, 75, 90, 105, 120]
general_rank = [1, 2, 3, 4, 5]
# disliked = -1, didn't rank = 0, liked = 1:
like = [-1, 0, 1]
user_rank = [1, 2, 3, 4, 5]
cols = [cuisines, cooking_level, cooking_time, general_rank, like, user_rank]
cols_names = ["cuisines", "cooking_level", "cooking_time", "general_rank", "like", "user_rank"]

### Functions declerations

In [8]:
def add_cols_to_df(df, cols):
    """
    Add columns to a dataframe
    """
    for col in cols:
        df[col] = None
    return df

# generates an arbitary array of values for each feature we are adding to DF
def generate_random_instances(feature, number):
    instances = []
    for i in range(number):
        instances.append(random.choice(feature))
    return instances

## find dist of each category under each new feature:
def show_dist_of_cols(df, cols_names):
    for col in cols_names:
        distribution = df[col].value_counts(normalize=True,sort=True)
        print(f'{col}: {distribution}')
        
def weight_recipe(rating):
    return math.log(rating)

### ------------------------------------

In [9]:
# df_parsed = add_cols_to_df(df_parsed, cols_names)
# df_recipes = add_cols_to_df(df_recipes, cols_names)

In [10]:
print(f'df_recipes shape: {df_recipes.shape}')
print(f'df_parsed shape: {df_parsed.shape}')

df_recipes shape: (4647, 3)
df_parsed shape: (4647, 4)


In [11]:
# creating df of the new generated features:
number = df_parsed.shape[0]
cuisines_data = generate_random_instances(cuisines, number)
cooking_time_data = generate_random_instances(cooking_time, number)
cooking_level_data = generate_random_instances(cooking_level, number)
general_rank_data = generate_random_instances(general_rank, number)
like_data = generate_random_instances(like, number)
user_rank_data = generate_random_instances(user_rank, number)
data = {'cuisines': cuisines_data, 'cooking_time': cooking_time_data, 'cooking_level': cooking_level_data, 'general_rank': general_rank_data, 'like': like_data, 'user_rank': user_rank_data}
new_features_df = pd.DataFrame(data)

In [12]:
show_dist_of_cols(new_features_df, cols_names)

cuisines: Chinese     0.132774
Japanese    0.129976
Italian     0.128900
Indian      0.128685
Thai        0.122445
French      0.119862
Mexican     0.119217
American    0.118141
Name: cuisines, dtype: float64
cooking_level: Hard      0.347966
Easy      0.329675
Medium    0.322359
Name: cooking_level, dtype: float64
cooking_time: 105    0.131052
120    0.126964
60     0.125027
30     0.124597
45     0.124166
90     0.123521
75     0.122875
15     0.121799
Name: cooking_time, dtype: float64
general_rank: 4    0.209167
1    0.206155
3    0.198408
2    0.193458
5    0.192813
Name: general_rank, dtype: float64
like:  1    0.334194
-1    0.333764
 0    0.332042
Name: like, dtype: float64
user_rank: 4    0.202496
1    0.201635
5    0.200775
3    0.199699
2    0.195395
Name: user_rank, dtype: float64


In [13]:
df_recipes = pd.concat([df_recipes, new_features_df],axis=1)
df_parsed = pd.concat([df_parsed, new_features_df],axis=1)

In [14]:
df_recipes.columns

Index(['recipe_urls', 'recipe_name', 'ingredients', 'cuisines', 'cooking_time',
       'cooking_level', 'general_rank', 'like', 'user_rank'],
      dtype='object')

In [15]:
df_parsed.columns

Index(['recipe_urls', 'recipe_name', 'ingredients', 'ingredients_parsed',
       'cuisines', 'cooking_time', 'cooking_level', 'general_rank', 'like',
       'user_rank'],
      dtype='object')

In [16]:
df_recipes.head()

Unnamed: 0,recipe_urls,recipe_name,ingredients,cuisines,cooking_time,cooking_level,general_rank,like,user_rank
0,https://www.jamieoliver.com/recipes/duck-recip...,Roast duck with Marsala gravy,"['1 x 1.6kg whole duck', '2 heaped teaspoons C...",American,105,Easy,5,1,2
1,https://www.jamieoliver.com/recipes/vegetable-...,Best-ever Brussels sprouts,"['800 g Brussels sprouts', '2 higher-welfare C...",French,120,Hard,4,-1,4
2,https://www.jamieoliver.com/recipes/pasta-reci...,Beautiful courgette carbonara,"['6 medium green and yellow courgettes', '500 ...",Indian,15,Hard,4,1,5
3,https://www.jamieoliver.com/recipes/vegetable-...,Roasted black bean burgers,"['1½ red onions', '200 g mixed mushrooms', '10...",French,60,Hard,3,-1,4
4,https://www.jamieoliver.com/recipes/chicken-re...,Chicken & tofu noodle soup,"['2 shallots', '2 cloves of garlic', '2 cm pie...",Japanese,120,Easy,4,-1,4


In [17]:
# df_recipes.to_csv('input/df_recipes_augmanted.csv')
# df_parsed.to_csv('input/df_parsed_augmanted.csv')

In [18]:
for df in [df_recipes, df_parsed]:
    for col in ['general_rank', 'user_rank']:
        col_name = col + "_weighted"
        df[col_name] = df[col].apply(weight_recipe)

In [19]:
df_recipes

Unnamed: 0,recipe_urls,recipe_name,ingredients,cuisines,cooking_time,cooking_level,general_rank,like,user_rank,general_rank_weighted,user_rank_weighted
0,https://www.jamieoliver.com/recipes/duck-recip...,Roast duck with Marsala gravy,"['1 x 1.6kg whole duck', '2 heaped teaspoons C...",American,105,Easy,5,1,2,1.609438,0.693147
1,https://www.jamieoliver.com/recipes/vegetable-...,Best-ever Brussels sprouts,"['800 g Brussels sprouts', '2 higher-welfare C...",French,120,Hard,4,-1,4,1.386294,1.386294
2,https://www.jamieoliver.com/recipes/pasta-reci...,Beautiful courgette carbonara,"['6 medium green and yellow courgettes', '500 ...",Indian,15,Hard,4,1,5,1.386294,1.609438
3,https://www.jamieoliver.com/recipes/vegetable-...,Roasted black bean burgers,"['1½ red onions', '200 g mixed mushrooms', '10...",French,60,Hard,3,-1,4,1.098612,1.386294
4,https://www.jamieoliver.com/recipes/chicken-re...,Chicken & tofu noodle soup,"['2 shallots', '2 cloves of garlic', '2 cm pie...",Japanese,120,Easy,4,-1,4,1.386294,1.386294
...,...,...,...,...,...,...,...,...,...,...,...
4642,https://www.allrecipes.com/recipe/213809/thai-...,Thai-Style Steamed Pumpkin Cake Recipe - Allre...,"[""1 (2 pound) pumpkin - peeled, seeded, and gr...",French,90,Easy,5,-1,5,1.609438,1.609438
4643,https://www.allrecipes.com/recipe/41751/delici...,Delicious Spicy Tomato Salad Recipe - Allrecip...,"[""1 apple - peeled, cored and sliced"",""1 chile...",Indian,45,Easy,5,1,2,1.609438,0.693147
4644,https://www.allrecipes.com/recipe/264507/spicy...,Spicy Sriracha Meatballs Recipe - Allrecipes.com,"[""1 egg"",""1 lime, zested"",""1 tablespoon creamy...",Chinese,75,Medium,5,0,2,1.609438,0.693147
4645,https://www.allrecipes.com/recipe/264012/ajad-...,Ajad (Authentic Thai Cucumber Salad) Recipe - ...,"[""1 1/2 teaspoons salt"",""1/2 cup white vinegar...",Japanese,45,Hard,3,0,4,1.098612,1.386294
