In [2]:
import numpy as np 
import pandas as pd
import csv 
import difflib
import itertools
import operator

import os 
os.chdir("/Users/susanchen/Documents/GitHub/skincare_classification/Data_sets")

### This notebook will accomplish two preprocessing steps: Ingredients cleaning and Ingredients matching. The general steps in each process is listed below.

### Ingredients Cleaning:
- 2.1 Split ingredients into active and inactive 
- 2.2 Find number of inactive and active ingredients (if possible)
- 2.3 Check if ingredients are listed alphabetically 

### Ingredients Matching:
- 2.4 Create two dictionaries containing the ingredient name as the key and the rating and category as values respectively
- 2.5 Find the matching key of all these ingredient from our dictionary 
- 2.6 For all products loop over the ingredient list and find each ingredient's rating and category 
- 2.7 For all products, count how many ingredients belong to a certain category (antioxidants, emulsifiers,  emollients, etc)
- 2.8 For all products, find the top 3 ingredients by concentration
- 2.9 Clean up prices 

## 2.0 Load the 2 datafiles 

In [3]:
ingredientInfo = pd.read_csv('ingredient_cleaned.txt', sep = ",")
df = pd.read_csv('Master_uncleaned.csv')

In [4]:
cat_list =[]
for i in ingredientInfo.category.values:
    i = i.replace("['", "").replace("']", '').replace("'", '')
    cat_list.append(str(i)) 

In [5]:
ingredientInfo["Category"] = cat_list
ingredientInfo.head()

Unnamed: 0,name,category,rating,rating_num,Category
0,"1, 2-Hexanediol",['Preservatives'],Good,2,Preservatives
1,10-Hydroxydecanoic Acid,['Emollients'],Good,2,Emollients
2,4-T-butylcyclohexanol,"['Emollients', 'Skin-Soothing']",Good,2,"Emollients, Skin-Soothing"
3,Acacia farnesiana extract,"['Plant Extracts', 'Fragrance: Synthetic and F...",Poor,0,"Plant Extracts, Fragrance: Synthetic and Fragr..."
4,acacia senegal gum,"['Texture Enhancer', 'Plant Extracts', 'Skin-S...",Good,2,"Texture Enhancer, Plant Extracts, Skin-Soothing"


In [6]:
df.head()

Unnamed: 0,Product,Brand,Ingredients,Price,Skin_Type
0,#InstantDetox Facial Mask,Biobelle,"Water, Butylene Glycol, Glycerin, Trehalose, H...",3.99,['Oily']
1,#Peachy Facial Mask,Biobelle,"Water, Methylpropanediol, Butylene Glycol, Gly...",3.99,['Dry']
2,#Rise&Shine Facial Mask,Biobelle,"Water, Glycerin, Butylene Glycol, Triethylhexa...",3.99,['Combination']
3,#RoséAllDay Facial Mask,Biobelle,"Water, Methylpropanediol, Glycerin, Propanedio...",3.99,['Combination']
4,#VitaminSea Facial Mask,Biobelle,"Water, Butylene Glycol, Glycerin, Hydroxyaceto...",3.99,['Dry']


## 2.1 Split Ingredients into Active and Inactive

### It is a required standard to label ingredients in beauty and hygienic products as active and inactive if the product fall within in two categories. Those two categories are mandated by the FDA as (1) if the product contains ingredients are FDA classified drugs or (2) the product claims to have some medical benefit. In contract, if a product does not contain FDA classifed drugs or does not claim to have medical benefits, then its ingredients are consider to be comestic ingredients. Cosmetic products do not need to label their comestic ingredients as active or inactive, all ingredients can be listed in a single area on the label. The FDA only requires manufacturers to list cosmetic ingredients in order of the amount/concentration present, from highest to lowest.

In [8]:
def split_by_active_inactive(sr_ingredient):
            inactive_start = pd.concat([sr_ingredient.str.find('Other'),
                                       sr_ingredient.str.find('Inactive')],
                                       axis=1).max(axis=1)
            
            inactive_start = inactive_start.replace(-1, 0)
            inactive = [ingredient[start:] for (ingredient, start) in zip(sr_ingredient, inactive_start)]
            inactive = [ingredient[ingredient.find(':')+1:] for ingredient in inactive]                           
            active = [ingredient[:start] for (ingredient, start) in zip(sr_ingredient, inactive_start)]
            active = [ingredient[ingredient.rfind(':')+1:] for ingredient in active]
            return active, inactive 

## 2.3 Check if Ingredients are in alphabetically order. 

### Comestic products can label their ingredients in two ways: alphabetically or by concentration. Althought the latter (by concentration) is the standard, we still need to check. If ingredients are listed by concentration, we will then use this information to find the top 3 ingredients (excluding water) in step 2.8. 


In [9]:
def check_alphabetical(ingredient_list, tol = 1):
            if (len(ingredient_list)) <= tol:
                return True 
            for i in range(len(ingredient_list) - 1):
                if ingredient_list[i] > ingredient_list[i+1]:
                    return False 
            return True 

## All together:

In [10]:
class data_cleaning: 
    def __init__(self, df):
        self.df = df.copy(deep = True)

    def clean_ingredients(self):
        def split_by_active_inactive(sr_ingredient):
            inactive_start = pd.concat([sr_ingredient.str.find('Other'),
                                       sr_ingredient.str.find('Inactive')],
                                       axis=1).max(axis=1)
            
            inactive_start = inactive_start.replace(-1, 0)
            inactive = [ingredient[start:] for (ingredient, start) in zip(sr_ingredient, inactive_start)]
            inactive = [ingredient[ingredient.find(':')+1:] for ingredient in inactive]                           
            active = [ingredient[:start] for (ingredient, start) in zip(sr_ingredient, inactive_start)]
            active = [ingredient[ingredient.rfind(':')+1:] for ingredient in active]
            return active, inactive 

        def check_alphabetical(ingredient_list, tol = 1):
            if (len(ingredient_list)) <= tol:
                return True 
            for i in range(len(ingredient_list) - 1):
                if ingredient_list[i] > ingredient_list[i+1]:
                    return False 
            return True 
                
        
        self.df["Ingredients"].fillna('', inplace = True)
        self.df['active_ingredient'], self.df['inactive_ingredient']= split_by_active_inactive(self.df['Ingredients'])
        
        self.df['active_ingredient_list'] = self.df['active_ingredient'].apply(lambda x: [l.strip() for l in str(x).split(',') if l.lower().islower()])
        self.df['inactive_ingredient_list'] = self.df['inactive_ingredient'].apply(lambda x: [l.strip() for l in str(x).split(',') if l.lower().islower()])

        self.df["num_inactive_ingredients"] = self.df["inactive_ingredient_list"].apply(lambda x: len(x))
        self.df["num_active_ingredients"] = self.df["active_ingredient_list"].apply(lambda x: len (x))
        self.df["Is_alphabetical"] = self.df["inactive_ingredient_list"].apply(check_alphabetical)


    def clean(self):
        self.clean_ingredients()

    def get_df(self):
        return self.df

In [11]:
data_cleaner= data_cleaning(df)
data_cleaner.clean()
df_cleaned = data_cleaner.get_df()
df_cleaned

Unnamed: 0,Product,Brand,Ingredients,Price,Skin_Type,active_ingredient,inactive_ingredient,active_ingredient_list,inactive_ingredient_list,num_inactive_ingredients,num_active_ingredients,Is_alphabetical
0,#InstantDetox Facial Mask,Biobelle,"Water, Butylene Glycol, Glycerin, Trehalose, H...",3.99,['Oily'],,"Water, Butylene Glycol, Glycerin, Trehalose, H...",[],"[Water, Butylene Glycol, Glycerin, Trehalose, ...",23,0,False
1,#Peachy Facial Mask,Biobelle,"Water, Methylpropanediol, Butylene Glycol, Gly...",3.99,['Dry'],,"Water, Methylpropanediol, Butylene Glycol, Gly...",[],"[Water, Methylpropanediol, Butylene Glycol, Gl...",20,0,False
2,#Rise&Shine Facial Mask,Biobelle,"Water, Glycerin, Butylene Glycol, Triethylhexa...",3.99,['Combination'],,"Water, Glycerin, Butylene Glycol, Triethylhexa...",[],"[Water, Glycerin, Butylene Glycol, Triethylhex...",33,0,False
3,#RoséAllDay Facial Mask,Biobelle,"Water, Methylpropanediol, Glycerin, Propanedio...",3.99,['Combination'],,"Water, Methylpropanediol, Glycerin, Propanedio...",[],"[Water, Methylpropanediol, Glycerin, Propanedi...",31,0,False
4,#VitaminSea Facial Mask,Biobelle,"Water, Butylene Glycol, Glycerin, Hydroxyaceto...",3.99,['Dry'],,"Water, Butylene Glycol, Glycerin, Hydroxyaceto...",[],"[Water, Butylene Glycol, Glycerin, Hydroxyacet...",24,0,False
...,...,...,...,...,...,...,...,...,...,...,...,...
2019,Youthful Vitamin C Fresh Radiance Essence,No7,"Aqua (Water), Butylene Glycol, Glycerin, Gluco...",24.99,"['Dry', 'Oily', 'Combination', 'Normal']",,"Aqua (Water), Butylene Glycol, Glycerin, Gluco...",[],"[Aqua (Water), Butylene Glycol, Glycerin, Gluc...",18,0,False
2020,Yuza Sorbet Day Cream,Erborian,"Aqua/Water, Cyclomethicone, Glycerin, Nylon-12...",48.00,"['Oily', 'Normal', 'Sensitive', 'Combination',...",,"Aqua/Water, Cyclomethicone, Glycerin, Nylon-12...",[],"[Aqua/Water, Cyclomethicone, Glycerin, Nylon-1...",33,0,False
2021,Yuza Sorbet Night Treatment,Erborian,"Aqua/Water, Cyclomethicone, Glycerin, Cetearyl...",55.00,"['Oily', 'Normal', 'Sensitive', 'Combination',...",,"Aqua/Water, Cyclomethicone, Glycerin, Cetearyl...",[],"[Aqua/Water, Cyclomethicone, Glycerin, Ceteary...",15,0,False
2022,Yuzu Overnight Moisture Mask,Earth Therapeutics,"Water (Aqua), Propanediol, Glycerin, Hydrogena...",7.00,"['Dry', 'Normal', 'Sensitive', 'Combination']",,"Water (Aqua), Propanediol, Glycerin, Hydrogena...",[],"[Water (Aqua), Propanediol, Glycerin, Hydrogen...",41,0,False


## 2.4 Create the rating and category dictionaries 

In [12]:
ingredients_rating_dict = {name.lower(): row['rating_num'] for (idx, row) in ingredientInfo.iterrows() for name in row["name"].split('/')}

In [13]:
ingredients_category_dict = {name.lower(): row["Category"] for (idx, row) in ingredientInfo.iterrows() for name in row["name"].split('/')}

## 2.5 Ingredient Matching 
### Create a set of all unique ingredients in our dataframe. Use this set to build the matching dictionary. 

In [14]:
all_ingredients = set(list(itertools.chain(*df_cleaned['inactive_ingredient_list'].values)))

In [15]:
Match_dict={}
for ingredient in all_ingredients:
    match_matric = {key: difflib.SequenceMatcher(None, key, ingredient).ratio() for key in ingredients_category_dict.keys()} 
    best_match, best_metric = max(match_matric.items(), key = operator.itemgetter(1))

    if best_metric > .5:
        Match_dict[ingredient.lower()] = best_match
    else: 
        Match_dict[ingredient.lower()] = "unknown" 

In [16]:
Match_ratings = {}
Match_categories ={}
for k in Match_dict.keys():
    if k in ingredients_rating_dict.keys():
        Match_ratings[k] = ingredients_rating_dict.get(k)
        Match_categories[k] = ingredients_category_dict.get(k).replace("[", '').replace("]", '').split(",")


## 2.6 Map over the Ingredient rating and category(s)
### For all products loop over the ingredient list and find each ingredient's rating and category 

In [17]:
category_list = []
ratings_list = []
for Ingredients in df_cleaned['inactive_ingredient_list'].values:
    temp_list = []
    temp_list2 = []
    for i in Ingredients:
        i = i.lower()
        temp_list.append(Match_ratings.get(i))
        temp_list2.append(Match_categories.get(i))
    ratings_list.append(temp_list)
    category_list.append(temp_list2)

df_cleaned["ingredients_ratings_list"] = ratings_list
df_cleaned["ingredients_category_list"] = category_list

## The ingredient category list as of right now is 2 dimensional list. We need it to be 1 dimensional.

In [18]:
from iteration_utilities import deepflatten
flatten_list = []
for i in range(len(df_cleaned.ingredients_category_list)):
    flat = list(deepflatten(df_cleaned["ingredients_category_list"][i], depth =1))
    flatten_list.append(flat)

In [19]:
df_cleaned["ingredients_category_list"] = flatten_list
df_cleaned

Unnamed: 0,Product,Brand,Ingredients,Price,Skin_Type,active_ingredient,inactive_ingredient,active_ingredient_list,inactive_ingredient_list,num_inactive_ingredients,num_active_ingredients,Is_alphabetical,ingredients_ratings_list,ingredients_category_list
0,#InstantDetox Facial Mask,Biobelle,"Water, Butylene Glycol, Glycerin, Trehalose, H...",3.99,['Oily'],,"Water, Butylene Glycol, Glycerin, Trehalose, H...",[],"[Water, Butylene Glycol, Glycerin, Trehalose, ...",23,0,False,"[2, 2, 3, 2, None, 2, 2, None, 3, 1, 2, None, ...","[Miscellaneous, Texture Enhancer, Skin-Repleni..."
1,#Peachy Facial Mask,Biobelle,"Water, Methylpropanediol, Butylene Glycol, Gly...",3.99,['Dry'],,"Water, Methylpropanediol, Butylene Glycol, Gly...",[],"[Water, Methylpropanediol, Butylene Glycol, Gl...",20,0,False,"[2, 2, 2, 3, None, 2, 3, 1, None, None, None, ...","[Miscellaneous, Miscellaneous, Hydration, Tex..."
2,#Rise&Shine Facial Mask,Biobelle,"Water, Glycerin, Butylene Glycol, Triethylhexa...",3.99,['Combination'],,"Water, Glycerin, Butylene Glycol, Triethylhexa...",[],"[Water, Glycerin, Butylene Glycol, Triethylhex...",33,0,False,"[2, 3, 2, None, None, None, None, 2, 3, 1, Non...","[Miscellaneous, Skin-Replenishing, Skin-Resto..."
3,#RoséAllDay Facial Mask,Biobelle,"Water, Methylpropanediol, Glycerin, Propanedio...",3.99,['Combination'],,"Water, Methylpropanediol, Glycerin, Propanedio...",[],"[Water, Methylpropanediol, Glycerin, Propanedi...",31,0,False,"[2, 2, 3, 2, None, 2, 3, 1, None, None, None, ...","[Miscellaneous, Miscellaneous, Hydration, Ski..."
4,#VitaminSea Facial Mask,Biobelle,"Water, Butylene Glycol, Glycerin, Hydroxyaceto...",3.99,['Dry'],,"Water, Butylene Glycol, Glycerin, Hydroxyaceto...",[],"[Water, Butylene Glycol, Glycerin, Hydroxyacet...",24,0,False,"[2, 2, 3, None, 2, 2, 3, 1, None, None, None, ...","[Miscellaneous, Texture Enhancer, Skin-Repleni..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019,Youthful Vitamin C Fresh Radiance Essence,No7,"Aqua (Water), Butylene Glycol, Glycerin, Gluco...",24.99,"['Dry', 'Oily', 'Combination', 'Normal']",,"Aqua (Water), Butylene Glycol, Glycerin, Gluco...",[],"[Aqua (Water), Butylene Glycol, Glycerin, Gluc...",18,0,False,"[None, 2, 3, 3, 2, 2, 2, 2, None, 1, 2, 3, Non...","[None, Texture Enhancer, Skin-Replenishing, S..."
2020,Yuza Sorbet Day Cream,Erborian,"Aqua/Water, Cyclomethicone, Glycerin, Nylon-12...",48.00,"['Oily', 'Normal', 'Sensitive', 'Combination',...",,"Aqua/Water, Cyclomethicone, Glycerin, Nylon-12...",[],"[Aqua/Water, Cyclomethicone, Glycerin, Nylon-1...",33,0,False,"[None, 2, 3, 2, 2, 2, 2, 2, None, None, 2, Non...","[None, Emollients, Skin-Replenishing, Skin-Re..."
2021,Yuza Sorbet Night Treatment,Erborian,"Aqua/Water, Cyclomethicone, Glycerin, Cetearyl...",55.00,"['Oily', 'Normal', 'Sensitive', 'Combination',...",,"Aqua/Water, Cyclomethicone, Glycerin, Cetearyl...",[],"[Aqua/Water, Cyclomethicone, Glycerin, Ceteary...",15,0,False,"[None, 2, 3, 2, 2, None, 2, 2, None, 2, 2, Non...","[None, Emollients, Skin-Replenishing, Skin-Re..."
2022,Yuzu Overnight Moisture Mask,Earth Therapeutics,"Water (Aqua), Propanediol, Glycerin, Hydrogena...",7.00,"['Dry', 'Normal', 'Sensitive', 'Combination']",,"Water (Aqua), Propanediol, Glycerin, Hydrogena...",[],"[Water (Aqua), Propanediol, Glycerin, Hydrogen...",41,0,False,"[None, 2, 3, None, 3, None, None, 3, None, Non...","[None, Uncategorized, Skin-Replenishing, Skin..."


## 2.7 Count Ingredient Categories 

### The categories to count for are:
1. Antioxidants
2. Emollients
3. Hydration
4. Skin-Restoring
5. Plant Extracts
6. Preservatives
7. Skin-Softening
8. Sensitizing
9. Skin-Replenishing


In [20]:
from collections import Counter 
categories = ["Antioxidants", "Emollients", "Hydration", "Skin-Restoring", "Plant Extracts", "Preservatives", "Skin-Softening", "Sensitizing", "Skin-Replenishing"]


for category in categories:
    count = []
    for i in df_cleaned.ingredients_category_list.values:
        # declaring category to count
        x = category
        # initiating counter
        d = Counter(i) 
        # Add count 
        count.append(d[x])
    df_cleaned["num_of_" + category] = count

## 2.8 Find the top 3 ingredients by concentration
### Since the ingredients are not listed alphabetically, we can find the top 3 ingredients by concentration. The top 3 would be the first 3 listed ingredients or the first 4 listed ingredients if including water. 

In [21]:
top3 =[]
for i in range(len(df_cleaned)):
    main = df_cleaned.inactive_ingredient[i].split(", ")[:4]
    #print(main[0]) #print all the ways water is listed and to check what is the first listed ingredient in all products. It turns out to be water and it is not unusal that the ingredient in highest concentration is water. 
    if ("Water" or "Aqua" or "Eau" or "Water (Aqua)" in main[0]):
        main = main[1:4]
    else:
        main = main[:3]
    top3.append(main)
         
df_cleaned["top_3"] = top3

In [22]:
top_categories ={}
for Ingredients in df_cleaned['top_3'].values:
    for i in Ingredients:
        top_categories[i] = ingredients_category_dict.get(i)

top3category = []
for Ingredients in df_cleaned['top_3'].values:
    temp_list = []
    for i in Ingredients:
        i = i.lower()
        temp_list.append(top_categories.get(i))
    top3category.append(temp_list)
df_cleaned["top3_category_list"] = top3category

## 2.9 Clean up the price column
### Some items have a discounted price and original price. For consistency, we will keep only the original price. 


In [23]:
prices = df_cleaned.Price.values
for i in range(len(prices)):
    if "Original" in prices[i]:
        new = prices[i].replace('Online Only Price', "").replace("Market Value($212 value)Online Only Price", '').replace('Market Value($212 value)', '').replace("Market Value($43 value)", '')
        df_cleaned.Price[i] = new[-6:].replace("$",'')
    elif "Market Value" in prices[i]:
        df_cleaned.Price[i] = prices[i][:4]
    else:
        df_cleaned.Price[i] = prices[i]
        

### Remove unnecessary columns and save data set

In [25]:
df_cleaned.drop(["inactive_ingredient_list", "active_ingredient_list"], axis =1)


Unnamed: 0,Product,Brand,Ingredients,Price,Skin_Type,active_ingredient,inactive_ingredient,num_inactive_ingredients,num_active_ingredients,Is_alphabetical,...,num_of_Emollients,num_of_Hydration,num_of_Skin-Restoring,num_of_Plant Extracts,num_of_Preservatives,num_of_Skin-Softening,num_of_Sensitizing,num_of_Skin-Replenishing,top_3,top3_category_list
0,#InstantDetox Facial Mask,Biobelle,"Water, Butylene Glycol, Glycerin, Trehalose, H...",3.99,['Oily'],,"Water, Butylene Glycol, Glycerin, Trehalose, H...",23,0,False,...,0,0,0,0,0,0,0,1,"[Butylene Glycol, Glycerin, Trehalose]","[Texture Enhancer, Skin-Replenishing, Skin-Res..."
1,#Peachy Facial Mask,Biobelle,"Water, Methylpropanediol, Butylene Glycol, Gly...",3.99,['Dry'],,"Water, Methylpropanediol, Butylene Glycol, Gly...",20,0,False,...,0,0,0,0,0,0,0,1,"[Methylpropanediol, Butylene Glycol, Glycerin]","[None, Texture Enhancer, Skin-Replenishing, Sk..."
2,#Rise&Shine Facial Mask,Biobelle,"Water, Glycerin, Butylene Glycol, Triethylhexa...",3.99,['Combination'],,"Water, Glycerin, Butylene Glycol, Triethylhexa...",33,0,False,...,0,0,0,0,0,0,0,1,"[Glycerin, Butylene Glycol, Triethylhexanoin]","[Skin-Replenishing, Skin-Restoring, Texture En..."
3,#RoséAllDay Facial Mask,Biobelle,"Water, Methylpropanediol, Glycerin, Propanedio...",3.99,['Combination'],,"Water, Methylpropanediol, Glycerin, Propanedio...",31,0,False,...,0,0,0,1,2,0,0,2,"[Methylpropanediol, Glycerin, Propanediol]","[None, Skin-Replenishing, Skin-Restoring, None]"
4,#VitaminSea Facial Mask,Biobelle,"Water, Butylene Glycol, Glycerin, Hydroxyaceto...",3.99,['Dry'],,"Water, Butylene Glycol, Glycerin, Hydroxyaceto...",24,0,False,...,0,0,0,1,0,0,0,1,"[Butylene Glycol, Glycerin, Hydroxyacetophenone]","[Texture Enhancer, Skin-Replenishing, Skin-Res..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019,Youthful Vitamin C Fresh Radiance Essence,No7,"Aqua (Water), Butylene Glycol, Glycerin, Gluco...",24.99,"['Dry', 'Oily', 'Combination', 'Normal']",,"Aqua (Water), Butylene Glycol, Glycerin, Gluco...",18,0,False,...,0,0,0,2,3,0,0,2,"[Butylene Glycol, Glycerin, Gluconolactone]","[Texture Enhancer, Skin-Replenishing, Skin-Res..."
2020,Yuza Sorbet Day Cream,Erborian,"Aqua/Water, Cyclomethicone, Glycerin, Nylon-12...",48.00,"['Oily', 'Normal', 'Sensitive', 'Combination',...",,"Aqua/Water, Cyclomethicone, Glycerin, Nylon-12...",33,0,False,...,1,0,0,1,1,0,0,1,"[Cyclomethicone, Glycerin, Nylon-12]","[Emollients, Skin-Replenishing, Skin-Restoring..."
2021,Yuza Sorbet Night Treatment,Erborian,"Aqua/Water, Cyclomethicone, Glycerin, Cetearyl...",55.00,"['Oily', 'Normal', 'Sensitive', 'Combination',...",,"Aqua/Water, Cyclomethicone, Glycerin, Cetearyl...",15,0,False,...,2,0,0,0,0,0,0,1,"[Cyclomethicone, Glycerin, Cetearyl Alcohol]","[Emollients, Skin-Replenishing, Skin-Restoring..."
2022,Yuzu Overnight Moisture Mask,Earth Therapeutics,"Water (Aqua), Propanediol, Glycerin, Hydrogena...",7.00,"['Dry', 'Normal', 'Sensitive', 'Combination']",,"Water (Aqua), Propanediol, Glycerin, Hydrogena...",41,0,False,...,2,0,0,1,0,0,0,1,"[Propanediol, Glycerin, Hydrogenated Poly (C6-...","[None, Skin-Replenishing, Skin-Restoring, None]"


In [26]:
df_cleaned.to_csv('Master_cleaned.csv', index= False)

## The data set is now ready for exploratory analysis and plots. Refer to notebook 3 for this process. 