In [147]:
import pandas as pd

df_raw = pd.read_csv('recipes.csv')

print(df_raw.columns)
print(df_raw.head(2).to_dict())

Index(['RecipeId', 'Name', 'AuthorId', 'AuthorName', 'CookTime', 'PrepTime',
       'TotalTime', 'DatePublished', 'Description', 'Images', 'RecipeCategory',
       'Keywords', 'RecipeIngredientQuantities', 'RecipeIngredientParts',
       'AggregatedRating', 'ReviewCount', 'Calories', 'FatContent',
       'SaturatedFatContent', 'CholesterolContent', 'SodiumContent',
       'CarbohydrateContent', 'FiberContent', 'SugarContent', 'ProteinContent',
       'RecipeServings', 'RecipeYield', 'RecipeInstructions'],
      dtype='object')
{'RecipeId': {0: 38, 1: 39}, 'Name': {0: 'Low-Fat Berry Blue Frozen Dessert', 1: 'Biryani'}, 'AuthorId': {0: 1533, 1: 1567}, 'AuthorName': {0: 'Dancer', 1: 'elly9812'}, 'CookTime': {0: 'PT24H', 1: 'PT25M'}, 'PrepTime': {0: 'PT45M', 1: 'PT4H'}, 'TotalTime': {0: 'PT24H45M', 1: 'PT4H25M'}, 'DatePublished': {0: '1999-08-09T21:46:00Z', 1: '1999-08-29T13:12:00Z'}, 'Description': {0: 'Make and share this Low-Fat Berry Blue Frozen Dessert recipe from Food.com.', 1: 'Make

In [148]:
# Pick the columns to keep and rename them
columns_to_keep = {
    'Name': 'title',
    'RecipeCategory': 'category',
    'Keywords': 'keywords',
    'RecipeServings': 'servings',
    'RecipeIngredientParts': 'ingredients',
    'RecipeIngredientQuantities': 'ingredient_quantities',
    'RecipeInstructions': 'steps',
    'AggregatedRating': 'rating',
    'ReviewCount': 'review_count',
#    'Description': 'description',
#    'CookTime': 'cook_time',
#    'PrepTime': 'prep_time',
#    'TotalTime': 'total_time',
#    'RecipeId': 'recipe_id',
#    'AuthorId': 'author_id',
#    'AuthorName': 'author_name',
#    'DatePublished': 'date_published',
#    'Images': 'images',
#    'Calories': 'calories',
#    'FatContent': 'fat_content',
#    'SaturatedFatContent': 'saturated_fat_content',
#    'CholesterolContent': 'cholesterol_content',
#    'SodiumContent': 'sodium_content',
#    'CarbohydrateContent': 'carbohydrate_content',
#    'FiberContent': 'fiber_content',
#    'SugarContent': 'sugar_content',
#    'ProteinContent': 'protein_content',
#    'RecipeYield': 'recipe_yield'
}

# Select and rename the columns
df = df_raw[list(columns_to_keep.keys())].rename(columns=columns_to_keep)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 522517 entries, 0 to 522516
Data columns (total 9 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   title                  522517 non-null  object 
 1   category               521766 non-null  object 
 2   keywords               505280 non-null  object 
 3   servings               339606 non-null  float64
 4   ingredients            522517 non-null  object 
 5   ingredient_quantities  522514 non-null  object 
 6   steps                  522517 non-null  object 
 7   rating                 269294 non-null  float64
 8   review_count           275028 non-null  float64
dtypes: float64(3), object(6)
memory usage: 35.9+ MB


In [149]:
def calculate_missing_percentage(df):
    # Calculate the percentage of missing values for each column and print them
    missing_percentage = df.isna().mean() * 100
    missing_info = missing_percentage.sort_values(ascending=False).reset_index()
    missing_info.columns = ['Column', 'Percentage of Missing Values']
    print(missing_info)

calculate_missing_percentage(df)


                  Column  Percentage of Missing Values
0                 rating                     48.462155
1           review_count                     47.364775
2               servings                     35.005751
3               keywords                      3.298840
4               category                      0.143727
5  ingredient_quantities                      0.000574
6                  title                      0.000000
7            ingredients                      0.000000
8                  steps                      0.000000


In [150]:
# Normalize text columns
df['title'] = df['title'].str.strip().str.lower()
df['category'] = df['category'].str.strip().str.lower()
df['keywords'] = df['keywords'].str.strip().str.lower()
df['ingredients'] = df['ingredients'].str.strip().str.lower()
df['ingredient_quantities'] = df['ingredient_quantities'].str.strip().str.lower()
df['steps'] = df['steps'].str.strip().str.lower()

# Ensure 'servings', 'rating', 'review_count' are numeric
df['servings'] = pd.to_numeric(df['servings'], errors='coerce')
df['rating'] = pd.to_numeric(df['rating'], errors='coerce')
df['review_count'] = pd.to_numeric(df['review_count'], errors='coerce')

df = df.dropna(subset=['title', 'category', 'keywords', 'servings', 'ingredients', 'ingredient_quantities', 'steps'])
df.info()
calculate_missing_percentage(df)

<class 'pandas.core.frame.DataFrame'>
Index: 327969 entries, 0 to 522514
Data columns (total 9 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   title                  327969 non-null  object 
 1   category               327969 non-null  object 
 2   keywords               327969 non-null  object 
 3   servings               327969 non-null  float64
 4   ingredients            327969 non-null  object 
 5   ingredient_quantities  327969 non-null  object 
 6   steps                  327969 non-null  object 
 7   rating                 168216 non-null  float64
 8   review_count           171601 non-null  float64
dtypes: float64(3), object(6)
memory usage: 25.0+ MB
                  Column  Percentage of Missing Values
0                 rating                     48.709787
1           review_count                     47.677677
2                  title                      0.000000
3               category               

In [151]:
import ast

# Format the list to a python list - return None if list is empty
def convert_to_list(item_str):
    try: # Remove 'c(' and ')' and convert the string into a list
        cleaned_str = item_str.lstrip('c(').rstrip(')').replace('NA', '"to taste"')
        return ast.literal_eval(f'[{cleaned_str}]')
    
    except (ValueError, SyntaxError): # Return none if there are issues, to drop later
        return None

# Apply this function directly to the relevant columns
df['keywords'] = df['keywords'].apply(convert_to_list)
df['ingredients'] = df['ingredients'].apply(convert_to_list)
df['ingredient_quantities'] = df['ingredient_quantities'].apply(convert_to_list)
df['steps'] = df['steps'].apply(convert_to_list)

In [152]:
df = df.dropna(subset=['keywords','ingredients', 'ingredient_quantities', 'steps'])
calculate_missing_percentage(df)

                  Column  Percentage of Missing Values
0                 rating                     49.044200
1           review_count                     47.977734
2                  title                      0.000000
3               category                      0.000000
4               keywords                      0.000000
5               servings                      0.000000
6            ingredients                      0.000000
7  ingredient_quantities                      0.000000
8                  steps                      0.000000


Unnamed: 0,title,category,keywords,servings,ingredients,ingredient_quantities,steps,rating,review_count
0,low-fat berry blue frozen dessert,frozen desserts,"[dessert, low protein, low cholesterol, healthy, free of..., summer, weeknight, freezer, easy]",4.0,"[blueberries, granulated sugar, vanilla yogurt, lemon juice]","[4, 1/4, 1, 1]","[toss 2 cups berries with sugar., let stand for 45 minutes, stirring occasionally., transfer berry-sugar mixture to food processor., add yogurt and process until smooth., strain through fine sieve. pour into baking pan (or transfer to ice cream maker and process according to manufacturers' directions). freeze uncovered until edges are solid but centre is soft. transfer to processor and blend until smooth again., return to pan and freeze until edges are solid., transfer to processor and blend until smooth again., fold in remaining 2 cups of blueberries., pour into plastic mold and freeze overnight. let soften slightly to serve.]",4.5,4.0
3,carina's tofu-vegetable kebabs,soy/tofu,"[beans, vegetable, low cholesterol, weeknight, broil/grill, oven]",2.0,"[extra firm tofu, eggplant, zucchini, mushrooms, soy sauce, low sodium soy sauce, olive oil, maple syrup, honey, red wine vinegar, lemon juice, garlic cloves, mustard powder, black pepper]","[12, 1, 2, 1, 10, 1, 3, 2, 2, 2, 1, 2, 1/2, 1/4, 4]","[drain the tofu, carefully squeezing out excess water, and pat dry with paper towels., cut tofu into one-inch squares., set aside. cut eggplant lengthwise in half, then cut each half into approximately three strips., cut strips crosswise into one-inch cubes., slice zucchini into half-inch thick slices., cut red pepper in half, removing stem and seeds, and cut each half into one-inch squares., wipe mushrooms clean with a moist paper towel and remove stems., thread tofu and vegetables on to barbecue skewers in alternating color combinations: for example, first a piece of eggplant, then a slice of tofu, then zucchini, then red pepper, baby corn and mushrooms., continue in this way until all skewers are full., make the marinade by putting all ingredients in a blender, and blend on high speed for about one minute until mixed., alternatively, put all ingredients in a glass jar, cover tightly with the lid and shake well until mixed., lay the kebabs in a long, shallow baking pan or on a non-metal tray, making sure they lie flat. evenly pour the marinade over the kebabs, turning them once so that the tofu and vegetables are coated., refrigerate the kebabs for three to eight hours, occasionally spooning the marinade over them., broil or grill the kebabs at 450 f for 15-20 minutes, or on the grill, until the vegetables are browned., suggestions this meal can be served over cooked, brown rice. amounts can easily be doubled to make four servings.]",4.5,2.0
4,cabbage soup,vegetable,"[low protein, vegan, low cholesterol, healthy, winter, < 60 mins, easy]",4.0,"[plain tomato juice, cabbage, onion, carrots, celery]","[46, 4, 1, 2, 1]","[mix everything together and bring to a boil., reduce heat and simmer for 30 minutes (longer if you prefer your veggies to be soft)., refrigerate until cool., serve chilled with sour cream.]",4.5,11.0
7,buttermilk pie with gingersnap crumb crust,pie,"[dessert, healthy, weeknight, oven, < 4 hours]",8.0,"[sugar, margarine, egg, flour, salt, buttermilk, graham cracker crumbs, margarine]","[3/4, 1, 1, 2, 3, 1/4, 1, 1/2, 1/2, 2]","[preheat oven to 350°f., make pie crust, using 8 inch pie pan, do not bake., mix sugar and margarine in medium bowl until blended; beat in egg whites and egg., stir in flour, salt, and buttermilk until well blended., pour filling into prepared crust, bake 40 minutes or until sharp knife inserted near center comes out clean., sprinkle with nutmeg and serve warm or chilled., combine graham crumbs, gingersnap crumbs, and margarine in 8 or 9 inch pie pan, pat mixture evenly on bottom and side of pan., bake 8 to 10 minutes or until edge of crust is lightly browned., cool on wire rack.]",4.0,3.0
10,boston cream pie,pie,"[dessert, weeknight, oven, < 4 hours]",8.0,"[margarine, cake flour, baking powder, salt, sugar, vanilla, eggs, milk, sugar, cornstarch, milk, flour, salt, vanilla, butter, vanilla]","[1/2, 2 1/4, 3, 1, 1 1/2, 1/3, 1 1/2, 2, 1, 1/3, 1, 1 1/2, 1, 1/2, 1, 1/4, 1, 1 1/2, 2, 1]","[beat egg whites until soft peaks form., gradually add 1/2 cup sugar, beating until very stiff peaks form., sift together remaining dry ingredients into another bowl., add oil, half the milk and vanilla., beat 1 minute at medium speed., add remaining milk and egg yolks., beat 1 minute, scrape bowl., gently fold in egg whites., bake in two greased 9x 1.5-inch round pans in 350°f oven for 25 minutes., cool 10 minutes and then remove from pans., cool completely., fill with cream filling., frost with chocolate glaze., french custard filling: in saucepan, combine sugar, flour, cornstarch and salt. gradually stir in milk., cook and stir until mixture thickens and boils; cook and stir 2-3 minutes longer., stir a little of hot mixture into egg yolk; return to hot mixture., cook and stir until mixture just boils., add vanilla; cool., beat until smooth; fold in whipped cream., chocolate glaze: melt chocolate and butter over low heat, stirring constantly., remove from heat. stir in confectioners' sugar and vanilla until crumbly., blend in 3 tablespoons boiling water., add enough water (about 2 teaspoons), a teaspoon at a time, to form medium glaze of pouring consistency., pour quickly over top of cake; spread glaze evenly over top and sides.]",2.0,2.0
...,...,...,...,...,...,...,...,...,...
522509,spanish coffee with tia maria,beverages,"[< 15 mins, easy, from scratch]",1.0,"[lemon wedge, granulated sugar, cognac, brandy, maraschino cherry, ground cinnamon]","[1, 1, 1, 1 1/2, 6, 3, 1, 1]","[cut a small slit in the lemon wedge and slide it around the rim of the glass. dip the rim in granulated sugar and turn back and forth to coat well., add cognac, tia maria and pour in hot coffee. top generously with whipped cream, sprinkle on some cinnamon and garnish with a cherry. carefully wrap a serviette around the glass and enjoy! makes 1 coffee.]",,
522510,slow-cooker classic coffee cake,breads,[< 4 hours],12.0,"[all-purpose flour, brown sugar, butter, ground cinnamon, salt, sour cream, butter, eggs, powdered sugar, milk, vanilla]","[1, 1/2, 4, 2, 1/8, 1, 1, 1/2, 4, 1/2, 2 -3, 1/4]","[line bottom and sides of 5-quart oval slow cooker with single sheet of cooking parchment paper, and spray with cooking spray., in medium bowl, stir first five ingredients until crumbly. set aside., in large bowl, stir cake ingredients (cake mix, sour cream, butter, eggs) until blended. pour into slow cooker. place folded, clean dish towel under cover of cooker. this will prevent condensation from dripping down onto cake. cook on high heat setting 1 hour. carefully remove slow cooker’s ceramic insert, and rotate insert 180 degrees. sprinkle topping over cake. replace cover with dish towel under the cover. continue to cook on high heat setting 30 minutes to 1 hour or until toothpick inserted in center comes out clean., transfer ceramic insert from slow cooker to cooling rack. let stand 10 minutes. using parchment paper, carefully lift cake out of ceramic insert, and transfer to cooling rack. cool completely, about 1 hour. remove parchment paper., in small bowl, beat powdered sugar, milk and vanilla until smooth. drizzle over cake., for an even easier version, you can skip the glaze, and sprinkle powdered sugar on top of cooled cake., if preferred, 1/2 cup chopped toasted walnuts or pecans can be mixed into the topping mixture.]",,
522512,meg's fresh ginger gingerbread,dessert,[< 4 hours],8.0,"[fresh ginger, unsalted butter, dark brown sugar, dark corn syrup, molasses, egg, salt, all-purpose flour, baking soda, cinnamon, ground cloves, buttermilk]","[3, 1/2, 1/2, 1/4, 1/4, 1, 1/4, 1 1/2, 1, 1/2, 1/4, 1/2]","[preheat oven to 350&deg;f grease an 8x8 cake pan. this recipe uses 2 mixing bowls, 1 large and 1 medium., peel an grate your ginger if not using ginger paste., in the large mixing bowl, create together the butter and brown sugar. add molasses, syrup, egg and salt, beating after each addition. beat mixture on low speed until smooth., in a separate bowl, mix the flour, soda, & spices., mix half the dry mix into the sugar mixture. beat until smooth, mix in 1/4 cup buttermilk. beat until smooth., add remaining flour, then the remaining buttermilk, beating after each addition., use a large spoon to stir in the ginger. mix until well incorporated., pour into a well greased 8x8 cake pan and back 35 - 40 minutes or until the toothpick jabbed in the middle comes out clean.]",,
522513,roast prime rib au poivre with mixed peppercorns,very low carbs,"[high protein, high in..., < 4 hours]",8.0,"[dijon mustard, garlic, peppercorns, shallot, cognac, brandy]","[9, 2, 4, 2, 1/3, 3 1/2, 1/3]","[position rack in center of oven and preheat to 450°f. place beef, fat side up, in shallow roasting pan. sprinkle beef with salt. mix mustard and garlic in small bowl. spread mustard mixture over top of beef. sprinkle 2 tablespoons crushed peppercorns over mustard mixture., roast beef 15 minutes. reduce heat to 325°f. roast until meat thermometer inserted into center of beef registers 125°f. for medium-rare, tenting loosely with foil if crust browns too quickly, about 2 hours 45 minutes. transfer beef to platter. tent with foil to keep warm., pour pan juices into 2-cup glass measuring cup (do not clean pan). freeze juices 10 minutes. spoon fat off top of pan juices, returning 1-tablespoon fat to roasting pan. reserve juices., melt fat in same roasting pan over medium-high heat. add shallots and sauté until tender, scraping up any browned bits from bottom of pan, about 2 minutes. remove pan from heat. add canned beef broth, then cognac (liquid may ignite). return pan to heat and boil until liquid is reduced to 2 cups, about 15 minutes. add pan juices and remaining 1 teaspoon crushed peppercorns. transfer pan juices to sauceboat., carve roast and serve with juices.]",,


In [153]:
# Flatten the ingredients lists
all_ingredients = []
for ingredients_list in df['ingredients']:
    # Directly flatten and append the ingredients
    all_ingredients.extend(ingredients_list)

# all_ingredients_normalized = [ingredient.lower().strip() for ingredient in all_ingredients]

# Get unique ingredients
unique_ingredients = sorted(set(all_ingredients))
# unique_ingredients_Notnormal = sorted(set(all_ingredients))


# print (len(unique_ingredients_Notnormal))
print (len(unique_ingredients))
# Output the unique ingredients
for ingredient in unique_ingredients:
    print(ingredient)

# Convert the list to a pandas DataFrame and save it to a CSV file

unique_ingredients_df = pd.DataFrame(unique_ingredients, columns=['Ingredients'])
#unique_ingredients_df.to_csv('unique_ingredients_list.csv', index=False)



5819
1% fat buttermilk
1% fat cottage cheese
1% low-fat chocolate milk
1% low-fat milk
1-1/2 ingredient fiber crust
10-inch corn tortillas
10-inch flour tortilla
10-inch flour tortillas
10-inch whole wheat  tortillas
10-minute success rice
100 proof vodka
12-inch flour tortilla
12-inch flour tortillas
2 texans craving salsa far from home
2% buttermilk
2% cheddar cheese
2% evaporated milk
2% fat cottage cheese
2% low-fat chocolate milk
2% low-fat milk
2% milk
2% mozzarella cheese
20% sour cream
2bleu's 2 minute 2 easy pizza sauce
2bleu's sweet mustard sauce for pretzels and more!
3 legume butter
3-cheese gourmet cheddar blend cheese
5% fat ricotta cheese
6-inch corn tortillas
6-inch flour tortillas
6-inch tortillas
7-inch corn tortillas
7-inch flour tortillas
8-inch 97% fat free flour tortillas
8-inch fat-free flour tortillas
8-inch flour tortillas
8-inch low-carb whole wheat tortilla
8-inch ready-made graham cracker crust
9-inch flour tortillas
9-inch graham cracker crust
9-inch graham

In [154]:
unique_categories = df['category'].unique()

print(len(unique_categories))

drop_keywords = [
    'pie', 'cake', 'dessert', 'cheesecake', 'candy', 'cookie', 'gelatin', 
    'baking', 'sweets', 'pastries', 'pudding', 'cupcakes', 'brownies', 'mousse', 
    'tarts', 'muffins', 'turnovers', 'ice cream', 'bread', 'beverage', 'smoothie', 
    'shakes', 'alcohol', 'cocktail', 'tea', 'coffee', 'wine', 'beer', 'dressing'
]

# Drop deserts, drinks and baking recipes ( leaving mostly meals )
df = df[~df['category'].str.contains('|'.join(drop_keywords), case=False, na=False)]

# drop all without rating 
df = df.dropna(subset=['rating', 'review_count'])

# drop unless rating > 4 and more than 5 reviews 
df = df[df['rating'] >= 4.5]
df = df[df['review_count'] >= 5]

# drop where servings are not between 1-10
df = df[(df['servings'] >= 1) & (df['servings'] <= 10)]

# drop all with less than 3 keywords 
df = df[df['keywords'].apply(lambda x: isinstance(x, list) and len(x) >= 3)]

# Merge the keywords and category to a "tags" list 
df['keywords'] = df.apply(lambda row: row['keywords'] + [row['category']], axis=1)
df = df.rename(columns={'keywords': 'tags'})
df = df.drop(columns=['category'])

df

282


Unnamed: 0,title,tags,servings,ingredients,ingredient_quantities,steps,rating,review_count
4,cabbage soup,"[low protein, vegan, low cholesterol, healthy, winter, < 60 mins, easy, vegetable]",4.0,"[plain tomato juice, cabbage, onion, carrots, celery]","[46, 4, 1, 2, 1]","[mix everything together and bring to a boil., reduce heat and simmer for 30 minutes (longer if you prefer your veggies to be soft)., refrigerate until cool., serve chilled with sour cream.]",4.5,11.0
11,chicken breasts lombardi,"[chicken, poultry, meat, european, very low carbs, weeknight, oven, < 4 hours, chicken breast]",6.0,"[fresh mushrooms, butter, boneless skinless chicken breast halves, flour, butter, marsala, chicken broth, salt, mozzarella cheese, parmesan cheese, green onion]","[2, 2, 12, 1/2, 1/3, 3/4, 1/2, 1/2, 1/2, 1/2, 1/4]","[cook mushrooms in 2 tbsp butter in a large skillet, stirring constantly, just until tender., remove from heat; set aside. cut each chicken breast half in half lengthwise. place each piece of chicken between two sheets of wax paper; flatten to 1/8"" thickness, using a meat mallet or rolling pin., dredge chicken pieces in flour., place 5 or 6 pieces of chicken in 1 to 2 tbsp butter in a large skillet; cook over medium heat 3 to 4 minutes on each side or until golden., place chicken in a lightly greased 13x9"" baking dish, overlapping edges., repeat procedure with remaining chicken and butter., reserve pan drippings in skillet. sprinkle reserved mushrooms over chicken., add wine and broth to skillet. bring to a boil; reduce heat, and simmer, uncovered, 10 minutes, stirring occasionally., stir in salt and pepper., pour sauce over chicken., combine cheeses and green onions; sprinkle over chicken., bake uncovered at 450 for 12 to 14 minutes., broil 5 1/2"" away from heat 1 to 2 minutes or until browned.]",5.0,21.0
20,low-fat burgundy beef & vegetable stew,"[vegetable, meat, low cholesterol, healthy, free of..., weeknight, < 4 hours, stew]",6.0,"[beef eye round, dried thyme leaves, salt, pepper, ready-to-serve beef broth, burgundy wine, garlic, cornstarch, frozen sugar snap peas]","[1 1/2, 1, 1, 1/2, 1/2, 1, 1/2, 3, 5 1/2, 1, 2, 1]","[trim fat from beef, cut into 1-inch pieces., in dutch oven, heat oil over medium high hunt until hot. add beef (half at a time) and brown evenly, stirring occasionally., pour off drippings., season with thyme, salt and pepper., stir in broth, wine and garlic. bring to boil; reduce heat to low., cover tightly and simmer 1 1/2 hours., add carrots and onions., cover and continue cooking 35 to 40 minutes or until beef and vegetables are tender., bring beef stew to a boil over medium-high heat. add cornstarch mixture; cook and stir 1 minute. stir in sugar snap peas., reduce heat to medium and cook 3 to 4 minutes or until peas are heated through.]",4.5,7.0
24,"black bean, corn, and tomato salad","[corn, beans, vegetable, low cholesterol, healthy, free of..., spring, summer, < 30 mins, easy, black beans]",2.0,"[fresh lemon juice, olive oil, black beans, fresh corn kernels, plum tomato, scallion, fresh parsley leaves, cayenne, boston lettuce leaves]","[3, 2, 1, 1, 1, 1, 2, 1, 4]","[in a bowl whisk together lemon juice, oil, and salt to taste., stir in remaining ingredients, except lettuce leaves, with salt and black pepper to taste., let salad stand, stirring once or twice, 15 minutes for flavors to develop., line 2 plates with lettuce and divide salad between them.]",5.0,23.0
51,brown rice and vegetable pilaf,"[rice, vegetable, weeknight, < 4 hours, brown rice]",6.0,"[brown rice, chicken broth, unsalted butter, oregano, marjoram, summer savory, unsalted butter, celery, fresh snow pea, broccoli floret, green onion, lemons, coconut]","[1 1/2, 3, 3, 1/2, 1/2, 1/2, 5, 1, 1, 1, 1, 1 1/2, 2, 3/4]","[preheat oven to 325 degrees f., heat chicken broth to boiling., combine broth with brown rice, butter, oregano, marjoram, and summery savory in 3 quart casserole and bake, covered, for 1-1/2 hours or until rice is tender., keep hot. melt butter in large skillet or dutch oven., add celery, snow peas, broccoli, green onion, and ginger and stir fry until crisp-tender., add vegetables, lemon rind, and toasted coconut to rice. toss lightly to combine. adjust seasonings.]",5.0,8.0
...,...,...,...,...,...,...,...,...
506514,sundried tomato and sweet basil shakshuka #ragu,"[beans, southwest asia (middle east), weeknight, brunch, < 30 mins, easy, inexpensive, sauces]",4.0,"[chickpeas, eggs, extra virgin olive oil, pine nuts, feta cheese]","[1, 1, 8, 1, 2, 3, 1]","[pour jar of ragu tomato sauce on a 11"" skillet., heat on medium for 5-7 minutes., add rinsed and drained chickpeas and distribute them evenly through the sauce and skillet., make a well for each egg on the tomato sauce and crack eggs inside., cover the pan and cook over medium heat until the whites are set and the yolks have the consistency of your preference (runny or cooked all the way). about 7-10 minutes., while the eggs are cooking, heat 1 olive oil in a small separate skillet add pine nuts and cook at medium heat for 3-5 minutes until golden brown. watch pine nuts carefully as they can easily burn., before serving top tomato sauce and eggs with crumbled feta cheese and pine nuts. serve warm and enjoy with crusty bread.]",5.0,8.0
508211,spinach artichoke dip recipe like houston's,"[greens, artichoke, vegetable, < 60 mins, spinach]",8.0,"[garlic cloves, onions, onion, real butter, all-purpose flour, heavy cream, chicken broth, pecorino romano cheese, lemon juice, salt, sour cream, frozen chopped spinach, artichoke hearts, white cheddar cheese]","[2, 2, 1/4, 1/4, 2, 1/4, 2/3, 2, 1/2, 1/2, 1/4, 20, 12, 1/2]","[in a 2-quart saucepan over medium heat, sauté garlic and onion in butter until golden, about 3 - 5 minutes., -stir in flour and cook for 1 minute., -slowly whisk in cream and broth and continue cooking until boiling., -once boiling, stir in romano, lemon juice, hot sauce, and salt; stir until cheese has melted; remove from heat and allow to cool for 5 minutes., -stir sour cream into pan, then fold in dry spinach and artichoke hearts., -fold the mixture into a microwave-safe serving dish, or into several serving-size dishes., -sprinkle cheddar evenly over top(s)., -at this point, the dip can be refrigerated until ready to serve, if desired., -microwave dip on 50% power just until cheese has melted.]",5.0,6.0
516672,korean-inspired popcorn chicken,"[poultry, meat, korean, asian, < 30 mins, chicken]",4.0,"[chicken, cornstarch, kosher salt, black pepper, canola oil, sesame seeds, scallions, garlic clove, soy sauce, light brown sugar, honey]","[1, 2, 1, 1, 1/2, 1/2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1]","[chicken:, preheat canola oil in a heavy bottomed pot to 350 degrees., combine chicken and rice wine vinegar in a large bowl. allow to marinate for 15 minutes., in a shallow baking dish or bowl whisk together cornstarch, salt and pepper. working in batches dredge the marinated chicken in the cornstarch mixture. fry chicken in batches for 4-5 minutes until lightly browned and fully cooked. allow the oil to come up to temperature between frying. remove to a paper towel lined plate. season with salt., sauce:, combine all ingredients in a large saute pan over medium high heat. allow the mixture to come to a boil then reduce heat to medium low until the sauce thickens, about 5 minutes. add the cooked chicken to the sauce and toss to coat. garnish with toasted sesame seeds and sliced scallions.]",5.0,7.0
520386,cuban mojo potatoes,"[caribbean, low protein, low cholesterol, healthy, < 60 mins, cuban]",4.0,"[yukon gold potatoes, salt, lime, olive oil, red onion, garlic cloves, scallions, red pepper flakes, cilantro, parsley]","[1 1/2, 1, 1/2, 1/4, 1, 6, 3, 1, 1/2]","[lay a sheet of aluminum foil on the countertop., -place all the ingredients (except cilantro) on top of the foil., -fold up the foil to create a tightly sealed package., -lay a second sheet of aluminum foil on the counter and double wrap the potatoes. make sure the package is tightly sealed., -place the potatoes over the hottest part of the grill., -turn every 10 minutes, for about 40-50 minutes., -open the foil packet and pour the potatoes, add cilantro (or parsley) and stir to combine.]",5.0,6.0


In [161]:
# Finally trim it to a 5k rows (for more manageable training)
# And eport 
trimmed_df = df.sample(n=5000, random_state=50)
trimmed_df.to_csv('recipes_5k_pruned.csv', index=False)
trimmed_df

Unnamed: 0,title,tags,servings,ingredients,ingredient_quantities,steps,rating,review_count
219751,spicy hot dog bites,"[summer, beginner cook, < 4 hours, easy, meat]",4.0,"[ketchup, brown sugar, red wine, white vinegar, soy sauce, dijon mustard, spicy brown mustard, cayenne, garlic clove]","[1, 1/2, 1, 2, 1, 1/2, 1, 1]","[combine ketchup, brown sugar, vinegar, soy sauce, mustard, and garlic in the crockery pot., cover and cook on high for 1 hour, stirring occasionally., add hot dogs; stir to coat with sauce., cover and cook on low for 1 1/2 hours, or until hot., serve hot, with toothpicks, from crockpot.]",4.5,6.0
267038,ham &amp; pineapple pizza muffins,"[pineapple, pork, tropical fruits, fruit, meat, kid friendly, < 30 mins, easy, ham]",8.0,"[english muffins, tomato paste, dried oregano, pineapple chunks in juice, ham, mozzarella cheese]","[4, 1/2, 1, 440, 150, 200]","[split each muffin in half, spread each with the paste, top each muffin with the pineapple, ham then cheese and oregano. place muffins on oven tray in moderate oven for about 15 - 20 minutes, or until cheese is melted and pizzas are heated through., serve immediately.]",5.0,5.0
195653,cheesy chicken and rice casserole,"[chicken breast, chicken, long grain rice, poultry, rice, cheese, vegetable, meat, kid friendly, spring, < 60 mins, oven, easy, inexpensive, one dish meal]",4.0,"[water, long-grain white rice, onion powder, boneless skinless chicken breast halves, cheddar cheese]","[1, 1 1/3, 3/4, 2, 1/2, 4, 1/2]","[stir the soup, water, rice, vegetables and onion powder in a 12"" x 8"" shallow baking dish., top with chicken., season chicken as desired., cover., bake at 375°f for 45 minutes or until done., top with cheese. makes 4 servings.]",4.5,10.0
13382,eggplant (aubergine) kuku (persian eggplant),"[vegetable, southwest asia (middle east), asian, low protein, kosher, potluck, < 4 hours, lunch/snacks]",4.0,"[eggplants, eggplants, ghee, onions, garlic cloves, eggs, fresh parsley, powdered saffron, lime, juice of, baking powder, all-purpose flour, salt, fresh ground black pepper]","[2, 1, 1/2, 2, 4, 4, 4, 1/4, 1, 1, 1, 1, 1/4]","[peel the eggplants, cut them lengthwise in quarters if they are large, and salt them to remove bitternes if necessary., brush each side of the eggplant pieces with eggwhite to reduce the oil needed for frying., in a skillet, heat 4 tablespoons oil over medium heat., add the onion and stir-fry for 10 minutes, until translucent., add the eggplant and garlic and stir-fry 10 minutes longer, until all sides are lightly golden brown., remove from heat and allow to cool., preheat the oven to 350°f., pour 4 tablespoons of oil into an 8-inch baking dish lined with parchment paper., break the eggs into a large bowl., add the parsley, saffron water, lime juice, baking powder, flour, salt, and pepper., beat thoroughly with a fork., add the eggplant, onion and garlic and mix thoroughly., pour the mixture into the dish and bake uncovered for 45 to 50 minutes, until the edge is golden brown., serve the kuku from the baking dish or unmold it by loosening the edge with a knife and inverting the dish onto a serving platter., remove the parchment paper.]",4.5,13.0
254685,homemade sloppy joes or hot dog chili,"[meat, kid friendly, < 30 mins, beginner cook, easy, inexpensive, lunch/snacks]",4.0,"[lean ground beef, ketchup, water, chili powder, paprika, salt, onion powder, black pepper]","[1, 1/2, 1 1/2, 1/2, 1 - 1 1/2, 1/2, 1/2, 1/2, 1/4, 8]","[brown the beef in a large frying pan, breaking it into tiny pieces., drain any excess fat then stir in the rest of the ingredients (except for the buns) with the beef. simmer on medium heat until the mixture is thick, about 10 minutes., serve on hamburger buns or over hot dogs.]",5.0,9.0
...,...,...,...,...,...,...,...,...
62264,pear and prosciutto di parma salad,"[pork, cheese, pears, greens, fruit, vegetable, nuts, meat, european, savory, sweet, < 30 mins, stove top, ham]",4.0,"[butter, boston lettuce, pears, prosciutto di parma, walnuts, white wine vinegar, dijon mustard, sea salt, white sugar, olive oil, pepper]","[1, 1, 8, 2, 1, 2, 12, 1/4, 2, 1 1/2, 1 1/2, 1/4, 1/4, 1/3, 4 -5]","[in small dishes, place egg whites and bread crumbs., in a large skillet over medium high heat, melt butter., dip goat cheese slices first in the egg whites, second in the bread crumbs., cook coated goat cheese in the hot butter until just golden, around 30 seconds for each side., remove and set aside., in a small bowl whisk together all the ingredients from orange juice to the pepper., on each of 4 salad plates, arrange 3 lettuce leaves., place 2 pear fans on each plate., place/tuck 3 slices of prosciutto on each plate., center the goat cheese rounds, dividing evenly., drizzle with vinaigrette and sprinkle with walnuts., serve.]",5.0,5.0
259027,spicy striped bass,"[szechuan, chinese, asian, spicy, < 30 mins, stir fry, easy, bass]",4.0,"[garlic cloves, scallions, habanero pepper, sugar, tamari soy sauce, white wine, catsup, white vinegar, water, scallions]","[1 1/2, 3, 3, 1, 1, 2, 2, 1, 3, 1, 1, 2, 4, 3]","[sauce: saute the garlic, scallions, ginger, and the habanero chili in oil in a saucepan. when browned, add the remaining ingredients and cook for 5 minutes., bass: grill the striped bass. depending on the thickness of the filets and the heat of the fire, grill for 8 to 10 minutes., pour sauce on the bass filets. garnish with chopped scallions.]",5.0,6.0
1242,nanna's yorkshire pudding,"[< 15 mins, oven, easy, european]",6.0,"[flour, milk, eggs]","[2, 1/2, 2, 1]","[in a bowl combine flour and salt., with a handheld mixer add the milk, in a stream, until smooth., add water and eggs and beat until combined well and bubbly., let stand, covered, at room temperature for 1 hour., preheat oven to 450°f, divide drippings among six muffin pan cups., heat the cups in the oven until almost smoking., beat batter until bubbly and divide among muffin cups., bake 10 minutes in lower third of the oven without opening the oven door., reduce oven temperature to 350 degrees f and continue to bake for another 10 minutes until puffed, crisp and golden brown., serve immediately., mum's yorkshire pudding 1 cup flour 1 cup milk 2 eggs salt & pepper put fat from roast into glass pie plate or use vegetable oil. (be generous.) mix listed ingredients together. cook in 450f oven for 25 minutes.]",5.0,7.0
5814,claude's baked beans,"[vegetable, low cholesterol, potluck, weeknight, oven, < 4 hours, easy, beans]",10.0,"[ground sausage, celery, onion, kidney beans, brown sugar, tomato paste, prepared mustard]","[1, 1, 1, 2, 16, 16, 1, 1, 6, 2 -3, 1]","[brown sausage, celery, and onions put on side., mix remaining ingredients and then mix with sausage mixture., bake at 350 degrees for 1 hour.]",5.0,7.0
