# Functionality Testing

<hr style="border:2px solid black"> </hr>

## Perform misc analysis to support functionality testing

Double-check statistics in visualizations using source data. Corresponds with functionality_testing.xlsx

---

### Import libraries

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time

In [2]:
def num_uniques(ser):
    try:
        return len(ser.unique())
    except:
        return "Not unique check-able"
    
def summarize_df(df):
    print("======DATA SUMMARY======")
    print("{} rows by {} columns".format(df.shape[0], df.shape[1]))
    print("\n======COLUMNS======")
    print(df.dtypes)
    print("\n======PREVIEW======")
    display(df.head())
    print("\n======NUMERICAL COL SUMMARY======")
    print(df.describe())
    print("\n")
    for col in df.columns:
        print("{}: {} unique values".format(col, num_uniques(df[col])))

---
### Test Ingredients Histogram

In [8]:
users_in = pd.read_csv('../05_RecipeExplorationTool/data/users_in_count2_mean4.csv')

In [9]:
summarize_df(users_in)

599009 rows by 3 columns

user_id        int64
recipe_id      int64
ratings      float64
dtype: object



Unnamed: 0,user_id,recipe_id,ratings
0,0,1118,5.0
1,0,27680,5.0
2,0,32541,5.0
3,0,137353,5.0
4,0,16428,5.0



             user_id      recipe_id        ratings
count  599009.000000  599009.000000  599009.000000
mean     4450.979807   87765.758825       4.658464
std      5644.504991   51419.630852       0.818773
min         0.000000       0.000000       0.000000
25%       480.000000   43034.000000       5.000000
50%      1927.000000   87776.000000       5.000000
75%      6339.000000  132193.000000       5.000000
max     25075.000000  178262.000000       5.000000


user_id: 25006 unique values
recipe_id: 85519 unique values
ratings: 6 unique values


In [14]:
recipes_in = pd.read_parquet('../05_RecipeExplorationTool/data/recipes_in_count2_mean4.parquet').reset_index()

In [15]:
summarize_df(recipes_in)

48454 rows by 7 columns

recipe_id       int64
nutrition      object
minutes         int64
techniques     object
cuisine        object
meal_of_day    object
ingredients    object
dtype: object



Unnamed: 0,recipe_id,nutrition,minutes,techniques,cuisine,meal_of_day,ingredients
0,137739,"{'calories': 51.5, 'carbohydrates': 4.0, 'prot...",55,"{'Bake': 1, 'Barbecue': 0, 'Blanch': 0, 'Blend...",North American,Side Dishes,"[winter squash, mexican seasoning, mixed spice..."
1,112140,"{'calories': 269.8, 'carbohydrates': 5.0, 'pro...",130,"{'Bake': 0, 'Barbecue': 0, 'Blanch': 0, 'Blend...",Uncategorized,Main Dish,"[ground beef, yellow onions, diced tomatoes, t..."
2,59389,"{'calories': 368.1, 'carbohydrates': 20.0, 'pr...",45,"{'Bake': 1, 'Barbecue': 0, 'Blanch': 0, 'Blend...",Uncategorized,Side Dishes,"[spreadable cheese with garlic and herbs, new ..."
3,44061,"{'calories': 352.9, 'carbohydrates': 28.0, 'pr...",190,"{'Bake': 0, 'Barbecue': 0, 'Blanch': 0, 'Blend...",North American,NO MEAL,"[tomato juice, apple cider vinegar, sugar, sal..."
4,5289,"{'calories': 160.2, 'carbohydrates': 7.0, 'pro...",0,"{'Bake': 0, 'Barbecue': 0, 'Blanch': 0, 'Blend...",North American,Beverages,"[milk, vanilla ice cream, frozen apple juice c..."



           recipe_id        minutes
count   48454.000000   48454.000000
mean    87532.195546     133.822244
std     49778.583631    1966.124949
min        40.000000       0.000000
25%     44397.500000      20.000000
50%     86717.000000      40.000000
75%    129724.500000      70.000000
max    178262.000000  187200.000000


recipe_id: 48454 unique values
nutrition: Not unique check-able unique values
minutes: 537 unique values
techniques: Not unique check-able unique values
cuisine: 40 unique values
meal_of_day: 9 unique values
ingredients: Not unique check-able unique values


In [16]:
user_recipe = users_in.merge(recipes_in, on='recipe_id')

In [17]:
summarize_df(user_recipe)

339880 rows by 9 columns

user_id          int64
recipe_id        int64
ratings        float64
nutrition       object
minutes          int64
techniques      object
cuisine         object
meal_of_day     object
ingredients     object
dtype: object



Unnamed: 0,user_id,recipe_id,ratings,nutrition,minutes,techniques,cuisine,meal_of_day,ingredients
0,0,27680,5.0,"{'calories': 414.7, 'carbohydrates': 35.0, 'pr...",15,"{'Bake': 0, 'Barbecue': 0, 'Blanch': 0, 'Blend...",North American,Beverages,"[boiling water, tea bags, sugar, frozen limead..."
1,193,27680,5.0,"{'calories': 414.7, 'carbohydrates': 35.0, 'pr...",15,"{'Bake': 0, 'Barbecue': 0, 'Blanch': 0, 'Blend...",North American,Beverages,"[boiling water, tea bags, sugar, frozen limead..."
2,197,27680,5.0,"{'calories': 414.7, 'carbohydrates': 35.0, 'pr...",15,"{'Bake': 0, 'Barbecue': 0, 'Blanch': 0, 'Blend...",North American,Beverages,"[boiling water, tea bags, sugar, frozen limead..."
3,313,27680,5.0,"{'calories': 414.7, 'carbohydrates': 35.0, 'pr...",15,"{'Bake': 0, 'Barbecue': 0, 'Blanch': 0, 'Blend...",North American,Beverages,"[boiling water, tea bags, sugar, frozen limead..."
4,330,27680,5.0,"{'calories': 414.7, 'carbohydrates': 35.0, 'pr...",15,"{'Bake': 0, 'Barbecue': 0, 'Blanch': 0, 'Blend...",North American,Beverages,"[boiling water, tea bags, sugar, frozen limead..."



             user_id      recipe_id        ratings        minutes
count  339880.000000  339880.000000  339880.000000  339880.000000
mean     4487.143907   87035.399188       4.655946     155.247726
std      5682.913685   49761.549422       0.823147    2147.284232
min         0.000000      40.000000       0.000000       0.000000
25%       480.000000   44040.000000       5.000000      20.000000
50%      1927.000000   86331.500000       5.000000      40.000000
75%      6412.000000  129258.000000       5.000000      70.000000
max     25074.000000  178262.000000       5.000000  187200.000000


user_id: 23549 unique values
recipe_id: 48454 unique values
ratings: 6 unique values
nutrition: Not unique check-able unique values
minutes: 537 unique values
techniques: Not unique check-able unique values
cuisine: 40 unique values
meal_of_day: 9 unique values
ingredients: Not unique check-able unique values


---
### Test ingredients histogram

In [28]:
user_recipe[['user_id', 'ingredients']].explode('ingredients') \
                                        .groupby('user_id', as_index=False).count() \
                                        .rename({'ingredients': 'num_ingredients'}, axis=1) \
                                        .groupby('num_ingredients', as_index=False).count() \
                                        .rename({'user_id': 'num_users'}, axis=1) \
                                        .head(20)

Unnamed: 0,num_ingredients,num_users
0,2,40
1,3,120
2,4,222
3,5,344
4,6,411
5,7,515
6,8,492
7,9,556
8,10,614
9,11,501


---
### Test ingredients wordcloud

In [32]:
user_recipe_788 = user_recipe[user_recipe['user_id'] == 788]
user_recipe_788[['user_id', 'ingredients']].explode('ingredients') \
                                            .groupby(['user_id', 'ingredients'], as_index=False).size() \
                                            .sort_values('size', ascending=False).head(15)

Unnamed: 0,user_id,ingredients,size
55,788,salt,5
3,788,black pepper,3
24,788,garlic powder,3
42,788,onion,2
64,788,tomato sauce,2
21,788,garlic,2
45,788,paprika,2
19,788,flour,2
60,788,sugar,2
8,788,cayenne pepper,2


---
### Test techniques chart

In [36]:
user_recipe_788 = user_recipe[user_recipe['user_id'] == 788]
user_recipe_788['technique_l'] = user_recipe_788['techniques'].apply(lambda x: [k for k, v in x.items() if v > 0])
user_recipe_788[['user_id', 'technique_l']].explode('technique_l') \
                                            .groupby(['user_id', 'technique_l'], as_index=False).size() \
                                            .sort_values('size', ascending=False).head(15)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,user_id,technique_l,size
0,788,Bake,4
7,788,Pour,4
2,788,Boil,3
3,788,Combine,3
6,788,Melt,2
10,788,Simmer,2
14,788,Toss,2
1,788,Blend,1
4,788,Drain,1
5,788,Grill,1


In [37]:
user_recipe['technique_l'] = user_recipe['techniques'].apply(lambda x: [k for k, v in x.items() if v > 0])
user_recipe[['user_id', 'technique_l']].explode('technique_l') \
                                            .groupby(['user_id', 'technique_l'], as_index=False).size() \
                                            .sort_values('size', ascending=False).head(15)

Unnamed: 0,user_id,technique_l,size
2699,94,Bake,861
8692,275,Bake,827
2708,94,Combine,760
8701,275,Combine,746
6040,193,Bake,654
2729,94,Pour,621
6048,193,Combine,609
8721,275,Pour,606
6491,208,Bake,555
7491,241,Bake,555


---
### Test cuisine chart

In [41]:
user_recipe_788[['cuisine']].groupby('cuisine', as_index=False).size().sort_values('size', ascending=False)

Unnamed: 0,cuisine,size
1,Uncategorized,5
0,North American,4


In [42]:
user_recipe[['cuisine']].groupby('cuisine', as_index=False).size().sort_values('size', ascending=False)

Unnamed: 0,cuisine,size
39,Uncategorized,176500
27,North American,85002
7,Comfort Food,20603
10,European,18501
2,Asian,18230
35,South West Pacific,5200
14,Greek,3385
11,French,2125
13,German,2058
0,African,2045
