In [1]:
import pandas as pd
from pytimeparse.timeparse import timeparse

In [2]:
df_new = pd.read_csv('datasets/dataframes/allrecipes_new.csv')
df_new['page_format'] = "new"
df_old = pd.read_csv('datasets/dataframes/allrecipes_old.csv')
df_old['page_format'] = "old"

In [3]:
df_new.shape

(5321, 21)

In [4]:
df_old.shape

(6073, 21)

### Old dataframe

In [5]:
# Convert made it to numeric
df_old['made_it'] = df_old['made_it'].replace(r'k', '000', regex=True).astype(int)

In [6]:
# Replace string <1 by a random float value between 0 and 1
df_old['sodium'] = np.where(df_old['sodium']=='< 1', np.random.uniform(0.05,1), df_old['sodium'])
df_old['cholesterol'] = np.where(df_old['cholesterol']=='< 1', np.random.uniform(0.05,1), df_old['cholesterol'])
df_old['sodium'] = pd.to_numeric(df_old['sodium'])
df_old['cholesterol'] = pd.to_numeric(df_old['cholesterol'])

In [7]:
# Drop home categories and recipe name
df_old['categories_list'] = df_old.categories_list.apply(lambda x: eval(x)[2:-1])

### New dataframe

In [8]:
# Clean prep time on new dataset
df_new["prep_time"] = df_new['prep_time'].str.replace("P", "").str.replace("T", "")
# Convert photos to numeric
df_new['photos'] = pd.to_numeric(df_new['photos'].str.replace(',',''))

### Concat both dataframes

In [9]:
df = pd.concat([df_old, df_new])
df.shape

(11394, 21)

In [10]:
#Drop link duplicates
df = df.drop_duplicates(subset='recipe_name',keep='last')
df.shape

(10892, 21)

In [11]:
# Convert prep time to minutes to minutes
df['prep_time'] = df['prep_time'].apply(lambda x: timeparse(str(x)))
df['prep_time'] = df['prep_time']/60

In [12]:
# Convert num_reviews to numeric
df['num_reviews'] = pd.to_numeric(df['num_reviews'].str.replace(',',''))
df.rename(columns={'num_reviews':'number_of_reviews'}, inplace = True)

In [15]:
df.tail()

Unnamed: 0,link,feature_list,recipe_name,description,ingredients_list,categories_list,calories,fat,carbs,protein,...,sodium,prep_time,number_of_rating,rating_score,number_of_reviews,reviews,photos,steps,made_it,page_format
5316,https://www.allrecipes.com/recipe/67125/braise...,"['Braised Venison with Rosemary and Shiitake',...",Braised Venison with Rosemary and Shiitake,Try my savory and sophisticated dish I created...,"['2 tablespoons bacon drippings', '1\u2009½ po...","['Meat and Poultry', 'Game Meats', 'Venison']",411.8,4.4,20.4,42.2,...,371.1,145.0,24.0,4.625,22.0,"[""This was fantastic! The only reasons I did n...",2.0,['Melt the bacon drippings in a large Dutch ov...,,new
5317,https://www.allrecipes.com/recipe/228363/crisp...,"['Crispy Roasted Chicken', ""This oven-roasted ...",Crispy Roasted Chicken,This oven-roasted chicken is inspired by the B...,"['1 teaspoon kosher salt', '½ teaspoon caraway...","['World Cuisine', 'European', 'German']",495.3,34.5,3.1,41.5,...,445.6,90.0,113.0,4.699115,89.0,['I subbed arrowroot starch for the flour for ...,18.0,['Preheat oven to 425 degrees F (220 degrees C...,,new
5318,https://www.allrecipes.com/recipe/218073/momma...,"[""Momma Pritchett's Grilled Pork Chops and App...",Momma Pritchett's Grilled Pork Chops and Apple...,"Well, I have yet to really eat good BBQ pork c...","['1 cup light brown sugar', '1 cup soy sauce',...","['Main Dishes', 'Pork', 'Pork Chops']",630.9,21.9,67.5,45.1,...,2198.8,105.0,56.0,4.696429,41.0,"[""Mouth-watering juicy pork tender loin. I mad...",7.0,"['In a large bowl, mix together 1 cup brown su...",,new
5319,https://www.allrecipes.com/recipe/219173/simpl...,"['Simple Beef Pot Roast', ""This pot roast reci...",Simple Beef Pot Roast,This pot roast recipe and technique could not ...,"['1 tablespoon vegetable oil', '3\u2009½ pound...","['Main Dish', 'Beef', 'Pot Roast']",507.0,39.2,5.6,31.7,...,883.0,210.0,614.0,4.741042,459.0,"[""After browsing reviews I decided to write my...",68.0,['Preheat the oven to 275 degrees F (135 degre...,,new
5320,https://www.allrecipes.com/recipe/22851/beef-p...,"['Beef Pot Pie III', ""I couldn't find a recipe...",Beef Pot Pie III,I couldn't find a recipe for beef pot pie so I...,"['1 pound sirloin steak, cubed', ' salt to ta...","['Main Dishes', 'Savory Pies', 'Pot Pie']",526.6,23.6,58.3,19.7,...,661.1,245.0,234.0,4.324786,174.0,"[""This came out wonderful even though I made a...",31.0,"['In a saucepan over medium heat, brown the pi...",,new


In [16]:
# Write csv
df.drop(["link", "feature_list"], axis=1, inplace=True)
df.to_csv('datasets/dataframes/allrecipes_full.csv', index=False)