In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import gmaps
import gmaps.datasets
import ipyleaflet
from IPython.display import display

In [None]:
#importing environment variables
from dotenv import dotenv_values
env_variables = dotenv_values('Ignore.env')

In [None]:
df = pd.read_csv('/Users/emilydanielbowser/Documents/Iowa Food Coop/Data/Intermediate Data/Wrangled_data')

In [None]:
df.head()

In [None]:
df.drop(['Unnamed: 0'], axis=1, inplace=True)


In [None]:
df.shape

In [None]:
df.columns

In [None]:
#Filling in all NAs from the different sales categories with 0's
df[['SaleNom', 'Baked Goods', 'Beverages', 'Classes and Events',
       'Condiments + Sauces', 'Dairy', 'Dried Herbs + Spices', 'Eggs',
       'Grains, Flours, Cereal + Pastas', 'Handmade Home Goods + Gifts',
       'Honey, Syrups, Jams + Jellies', 'Iowa Food Co-op Shop',
       'Local Produce', 'Meat - Beef', 'Meat - Chicken + Capon', 'Meat - Pork',
       'Meats - Other', 'Non-Food Items', 'Nuts', 'Other Protein Sources',
       'Personal Care', 'Pet + Animal Care', 'Prepared Foods', 'Snacks',
       'The Garden Center']]=df[['SaleNom', 'Baked Goods', 'Beverages', 'Classes and Events',
       'Condiments + Sauces', 'Dairy', 'Dried Herbs + Spices', 'Eggs',
       'Grains, Flours, Cereal + Pastas', 'Handmade Home Goods + Gifts',
       'Honey, Syrups, Jams + Jellies', 'Iowa Food Co-op Shop',
       'Local Produce', 'Meat - Beef', 'Meat - Chicken + Capon', 'Meat - Pork',
       'Meats - Other', 'Non-Food Items', 'Nuts', 'Other Protein Sources',
       'Personal Care', 'Pet + Animal Care', 'Prepared Foods', 'Snacks',
       'The Garden Center']].fillna(0)

In [None]:
cleaning_df = df[['IDCyc','IDMemb','Baked Goods', 'Beverages',
       'Classes and Events', 'Condiments + Sauces', 'Dairy',
       'Dried Herbs + Spices', 'Eggs', 'Grains, Flours, Cereal + Pastas',
       'Handmade Home Goods + Gifts', 'Honey, Syrups, Jams + Jellies',
       'Iowa Food Co-op Shop', 'Local Produce', 'Meat - Beef',
       'Meat - Chicken + Capon', 'Meat - Pork', 'Meats - Other',
       'Non-Food Items', 'Nuts', 'Other Protein Sources', 'Personal Care',
       'Pet + Animal Care', 'Prepared Foods', 'Snacks', 'The Garden Center','SaleNom']]

In [None]:
#Need to sort df oldest to newest first as we start making columns that cumulate over time.
cleaning_df = cleaning_df.sort_values(by=['IDMemb','IDCyc'], ascending=True)

In [None]:
cleaning_df.head()

In [None]:
cleaning_df[cleaning_df['IDMemb']==1078]

In [None]:
#Creating a Cumulative Sum column to add customer orders as they go since we can't use total sales in our final model.
cleaning_df["Cumulative_Sum"] = cleaning_df.groupby("IDMemb")["SaleNom"].transform(lambda x: x.shift().fillna(0).cumsum())

In [None]:
#Column of 1s and 0s for whether or not a person ordered during that ordering period
cleaning_df['Ordered'] = cleaning_df['SaleNom'].apply((lambda x: 1 if x>0 else 0))

In [None]:
#Summing the new column to have a cumulative sum of total orders for each member.
cleaning_df['Cumulative_Number_of_Orders'] = cleaning_df.groupby("IDMemb")["Ordered"].transform(lambda x: x.shift().fillna(0).cumsum())

In [None]:
#Calculating average order when the person orders. 
cleaning_df['order_per_cycle_when_ordering'] = cleaning_df['Cumulative_Sum']/cleaning_df['Cumulative_Number_of_Orders']

In [None]:
column_list = ['Baked Goods', 'Beverages','Classes and Events','Condiments + Sauces', 'Dairy',
       'Dried Herbs + Spices', 'Eggs', 'Grains, Flours, Cereal + Pastas',
       'Handmade Home Goods + Gifts', 'Honey, Syrups, Jams + Jellies',
       'Iowa Food Co-op Shop', 'Local Produce', 'Meat - Beef',
       'Meat - Chicken + Capon', 'Meat - Pork', 'Meats - Other',
       'Non-Food Items', 'Nuts', 'Other Protein Sources', 'Personal Care',
       'Pet + Animal Care', 'Prepared Foods', 'Snacks', 'The Garden Center']

for column in column_list:
    cleaning_df[f'{column}_Cum'] = cleaning_df.groupby("IDMemb")[column].transform(lambda x: x.shift().fillna(0).cumsum())
    cleaning_df.drop(column, axis=1, inplace=True)

In [None]:
cleaning_df.head()

In [None]:
cleaning_df[cleaning_df['IDMemb']==1016]

In [None]:
cleaning_df.sort_values(by=['IDCyc','IDMemb'], ascending=True,inplace = True)

In [None]:
#Cumulative count of how many cycles this person has been a member.
cleaning_df['Cycles_as_member'] = cleaning_df.groupby('IDMemb')['IDMemb'].cumcount() + 1

In [None]:
cleaning_df.head()

In [None]:
#Calulate how much each customer orders per cycle by dividing their total orders by the number of cycles they have been a member.
cleaning_df['order_per_cycle'] = cleaning_df['Cumulative_Sum']/(cleaning_df['Cycles_as_member']-1)

In [None]:
cleaning_df.head()

In [None]:
cleaning_df['order_per_cycle'] = cleaning_df['order_per_cycle'].fillna(0)

In [None]:
cleaning_df['order_per_cycle_when_ordering'] = cleaning_df['order_per_cycle_when_ordering'].fillna(0)

In [None]:
cleaning_df.head()

In [None]:
cleaning_df.columns

In [None]:
df.columns

In [None]:
df.drop(['Baked Goods', 'Beverages',
       'Classes and Events', 'Condiments + Sauces', 'Dairy',
       'Dried Herbs + Spices', 'Eggs', 'Grains, Flours, Cereal + Pastas',
       'Handmade Home Goods + Gifts', 'Honey, Syrups, Jams + Jellies',
       'Iowa Food Co-op Shop', 'Local Produce', 'Meat - Beef',
       'Meat - Chicken + Capon', 'Meat - Pork', 'Meats - Other',
       'Non-Food Items', 'Nuts', 'Other Protein Sources', 'Personal Care',
       'Pet + Animal Care', 'Prepared Foods', 'Snacks', 'The Garden Center'],axis=1, inplace=True)

In [None]:
print(df.shape,cleaning_df.shape)

In [None]:
df.columns

In [None]:
df=df.merge(cleaning_df,how='left', on =['IDCyc','IDMemb','SaleNom'])

In [None]:
df.shape

In [None]:
df = df.sort_values(['IDMemb','IDCyc'],ascending = True)

In [None]:
filepath = '/Users/emilydanielbowser/Documents/Iowa Food Coop/Data/Intermediate Data/cleaned_data'
df.to_csv(filepath)