# Preparing data frames to be used on the ClassApp project

In [1]:
import pandas as pd 
import numpy as np
from recipes import Recipes

Importing listings and BBC DB

In [2]:
cuukin = Recipes()
cuukin.import_data(listings_folder='listings', evaluation_folder='evaluation')

Assigning badge name to technique listing DF

In [3]:
cuukin.listings['techniques'] = cuukin.listings['techniques'].assign(badge_name = cuukin.find_badge_name(cuukin.listings['techniques']['badge_id']))

cuukin.listings['techniques'].head(3)

Unnamed: 0_level_0,name,score_1,score_2,score_3,badge_id,created_at,updated_at,badge_name
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
67,Measuring,15,2,0,36,2021-03-13 17:53:26.128000+00:00,2021-03-13 17:53:26.128000+00:00,Prepping techniques
66,Plating,15,2,0,40,2021-03-13 17:53:26.116000+00:00,2021-03-13 17:53:26.116000+00:00,Plating
65,Smoking,15,2,0,39,2021-03-13 17:53:26.107000+00:00,2021-03-13 17:53:26.107000+00:00,Alternative cooking techniques


Creating techniques data frame

In [4]:
techniques_df = cuukin.listings['techniques'][['name', 'badge_id', 'badge_name']]
techniques_df.rename_axis('techniques_id', inplace=True)

techniques_df.head(3)

Unnamed: 0_level_0,name,badge_id,badge_name
techniques_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
67,Measuring,36,Prepping techniques
66,Plating,40,Plating
65,Smoking,39,Alternative cooking techniques


Creating badges data frame

In [5]:
badges_df = cuukin.listings['badges'][cuukin.listings['badges']['category']=='technique']
badges_df = badges_df[['name']]
badges_df.rename_axis('badges_id', inplace=True)
badges_df.head(3)

Unnamed: 0_level_0,name
badges_id,Unnamed: 1_level_1
40,Plating
39,Alternative cooking techniques
38,Stove-top cooking


Creating methods data frame

In [6]:
cuukin.evaluation_data['recipe_methods'].dropna(axis='index', inplace=True)
cuukin.evaluation_data['recipe_methods'].index = cuukin.evaluation_data['recipe_methods'].index.astype('int')
cuukin.evaluation_data['recipe_methods'] = cuukin.evaluation_data['recipe_methods'].astype({'recipe_id': 'int'})
methods_df = cuukin.evaluation_data['recipe_methods']

methods_df.rename_axis('methods_id', inplace=True)
methods_df.head(3)

Unnamed: 0_level_0,recipe_id,description
methods_id,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0,Heat the oil in a frying pan and gently fry th...
1,0,"Add the chickpeas, harissa and tomatoes and co..."
2,0,Add a squeeze of lemon juice and season with s...


Assigning method index column

In [7]:
methods_df['method_index'] = 0
num_indexes = methods_df.shape[0]
for index in range(1,num_indexes):
    if methods_df.at[index, 'recipe_id'] == methods_df.at[index - 1, 'recipe_id']:
        methods_df.at[index, 'method_index'] = methods_df.at[index - 1, 'method_index'] + 1
    else:
        methods_df.at[index, 'method_index'] = 1
methods_df.head(6)

Unnamed: 0_level_0,recipe_id,description,method_index
methods_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,Heat the oil in a frying pan and gently fry th...,0
1,0,"Add the chickpeas, harissa and tomatoes and co...",1
2,0,Add a squeeze of lemon juice and season with s...,2
3,0,Spoon the yoghurt onto two plates or shallow b...,3
4,1,Bring a small saucepan of water to the boil an...,1
5,1,"Meanwhile, heat the oil in a wide frying pan o...",2


Creating recipes Data Frame

In [8]:
recipes_df = cuukin.evaluation_data['recipes'][['title']]
recipes_df.rename_axis('recipes_id', inplace=True)
recipes_df.head(3)

Unnamed: 0_level_0,title
recipes_id,Unnamed: 1_level_1
0,Chickpeas with harissa and yoghurt
1,"Chickpea, spinach and egg curry"
2,"Tomato, chickpea and pasta soup"


Randomizing recipes on methods data frame

In [9]:
recipes = [df for _, df in methods_df.groupby('recipe_id')]
np.random.shuffle(recipes)
df = pd.concat(recipes).reset_index()
df.rename_axis('rand_method_id', inplace = True)
rand_methods_df = df
print(rand_methods_df.head(3))

                methods_id  recipe_id  \
rand_method_id                          
0                     5068        863   
1                     5069        863   
2                     5070        863   

                                                      description  \
rand_method_id                                                      
0                        Preheat the oven to 200C/180C Fan/Gas 6.   
1               Put the mushrooms, chard, oil, garlic, chilli,...   
2               Bring a large pan of salted water to the boil,...   

                method_index  
rand_method_id                
0                          1  
1                          2  
2                          3  


Defining path and exporting pickled data frames

In [10]:
path = r'c:/users/guilh/code/classapp/data'

In [11]:
techniques_df.to_pickle(path + '/techniques')
badges_df.to_pickle(path + '/badges')
rand_methods_df.to_pickle(path + '/methods')
recipes_df.to_pickle(path+ '/recipes')