Used to reformat recipes for easier processing on backend (e.g., standard format for nutritional values)

In [1]:
import pandas as pd

In [35]:
df = pd.read_csv('RAW_recipes.csv')

In [36]:
df = df.replace(r'\n', ' ', regex=True)
df = df.replace(r'\r', ' ', regex=True)
df = df.replace(r'"', ' ', regex=True)

In [37]:
import re

def cleanName(string):
    string = str(string)
    return re.sub(r'\s+', r' ', string)

df['name'] = df['name'].apply(cleanName)

In [38]:
def nutritionIntegers(nutrition):
    nutrition = re.sub(r'\[', r'', nutrition)
    nutrition = re.sub(r'\]', r'', nutrition)
    return [float(nutrition) for nutrition in nutrition.split(', ')]

df['nutrition_integers'] = df['nutrition'].apply(nutritionIntegers)    

In [39]:
def addNutritionDetails(df):
    labels = ['calories', 'fat', 'sugar', 'sodium', 'protein', 'sat_fat', 'carbs']
    for i, l in enumerate(labels):
        df[l] = df['nutrition_integers'].apply(lambda n: n[i])

addNutritionDetails(df)

In [40]:
def cleanSteps(steps):
    steps = re.sub(r'\[', r'', steps)
    steps = re.sub(r'\]', r'', steps)
    return [step.strip() for step in steps.split(', ')]

df['steps'].apply(cleanSteps)

0         ['make a choice and proceed with recipe', 'dep...
1         ['preheat oven to 425 degrees f', 'press dough...
2         ['brown ground beef in large pot', 'add choppe...
3         ['place potatoes in a large pot of lightly sal...
4         ['mix all ingredients& boil for 2 1 / 2 hours,...
                                ...                        
231632    ['heat oil in a 4-quart dutch oven', 'add cele...
231633          ['mix all ingredients together thoroughly']
231634    ['in a bowl, combine the mashed yolks and mayo...
231635    ['place melted butter in a large mixing bowl a...
231636    ['whip sugar and shortening in a large bowl, a...
Name: steps, Length: 231637, dtype: object

In [41]:
labels = ['calories', 'fat', 'sugar', 'sodium', 'protein', 'sat_fat', 'carbs']

databaseColumns = ['id', 'name', 'description', 'n_steps', 'minutes', 'steps']
databaseColumns += labels

cleaned_df = df.loc[:, databaseColumns]
cleaned_df

Unnamed: 0,id,name,description,n_steps,minutes,steps,calories,fat,sugar,sodium,protein,sat_fat,carbs
0,137739,arriba baked winter squash mexican style,autumn is my favorite time of year to cook! th...,11,55,"['make a choice and proceed with recipe', 'dep...",51.5,0.0,13.0,0.0,2.0,0.0,4.0
1,31490,a bit different breakfast pizza,this recipe calls for the crust to be prebaked...,9,30,"['preheat oven to 425 degrees f', 'press dough...",173.4,18.0,0.0,17.0,22.0,35.0,1.0
2,112140,all in the kitchen chili,this modified version of 'mom's' chili was a h...,6,130,"['brown ground beef in large pot', 'add choppe...",269.8,22.0,32.0,48.0,39.0,27.0,5.0
3,59389,alouette potatoes,"this is a super easy, great tasting, make ahea...",11,45,['place potatoes in a large pot of lightly sal...,368.1,17.0,10.0,2.0,14.0,8.0,20.0
4,44061,amish tomato ketchup for canning,my dh's amish mother raised him on this recipe...,5,190,['mix all ingredients& boil for 2 1 / 2 hours ...,352.9,1.0,337.0,23.0,3.0,0.0,28.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
231632,486161,zydeco soup,this is a delicious soup that i originally fou...,7,60,"['heat oil in a 4-quart dutch oven', 'add cele...",415.2,26.0,34.0,26.0,44.0,21.0,15.0
231633,493372,zydeco spice mix,this spice mix will make your taste buds dance!,1,5,['mix all ingredients together thoroughly'],14.8,0.0,2.0,58.0,1.0,0.0,1.0
231634,308080,zydeco ya ya deviled eggs,"deviled eggs, cajun-style",7,40,"['in a bowl , combine the mashed yolks and may...",59.2,6.0,2.0,3.0,6.0,5.0,0.0
231635,298512,cookies by design cookies on a stick,"i've heard of the 'cookies by design' company,...",9,29,['place melted butter in a large mixing bowl a...,188.0,11.0,57.0,11.0,7.0,21.0,9.0


In [42]:
cleaned_df.to_csv('CLEAN_recipes.csv', index=False)