 # Breakdown of the count of each item sold per month

In [38]:
import json
import pandas as pd
import numpy as np
import calendar
import re

with open("transaction-data-adhoc-analysis.json","r") as f:
    tdata = json.load(f)
    
df = pd.DataFrame(tdata)

# split rows
lst_col = 'transaction_items' 
x = df.assign(**{lst_col:df[lst_col].str.split(';')}) 
tdf = pd.DataFrame(
    {col:np.repeat(x[col].values, x[lst_col].str.len())
     for col in x.columns.difference([lst_col])
    }).assign(**{lst_col:np.concatenate(x[lst_col].values)})[x.columns.tolist()]  #Source: wjandrea https://stackoverflow.com/questions/12680754/split-explode-pandas-dataframe-string-entry-to-separate-rows

# convert transaction_date strings into datetime
tdf['transaction_date'] = pd.to_datetime(tdf['transaction_date'])
tdf['month'] = tdf['transaction_date'].dt.month_name()

# custom number of splits
brand_split = tdf['transaction_items'].str.split(',', n=3, expand=True)
brand_split.rename(columns={0: 'brand', 1: 'item', 2:'qty'}, inplace=True)
brand_split['quantity'] = (brand_split['qty'].apply(lambda x: re.search(r'\d+', x).group())).astype(int)   #https://stackoverflow.com/questions/35376387/extract-int-from-string-in-pandas

In [39]:
# merge brand_split table and tdf table
new_df = pd.concat([tdf,brand_split],axis=1, ignore_index=False)

### __Total count of items sold per month__

In [40]:
sorted_months = new_df.month.unique()
new_df['month'] = pd.Categorical(new_df['month'], categories=sorted_months, ordered=True)
df1 = new_df.pivot_table(index=["item"],values=["quantity"],columns="month",aggfunc={'quantity': 'sum'}).sort_index()
df1

Unnamed: 0_level_0,quantity,quantity,quantity,quantity,quantity,quantity
month,January,February,March,April,May,June
item,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Beef Chicharon,9665,10001,9816,9890,10028,9902
Gummy Vitamins,9681,9980,10145,9842,9948,9980
Gummy Worms,9559,9996,9986,10043,9801,9934
Kimchi and Seaweed,9676,9949,9967,9921,9773,10104
Nutrional Milk,9727,9691,9876,9786,9881,9767
Orange Beans,9774,10037,9611,9914,9964,10106
Yummy Vegetables,9959,10256,9896,9861,9735,9722


### __Breakdown of items sold per customer__

In [41]:
df2 =  new_df.groupby(['name','username',"item","brand"])[['quantity']].sum()
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,quantity
name,username,item,brand,Unnamed: 4_level_1
Aaron Beasley,hermanjustin,Beef Chicharon,Exotic Extras,6
Aaron Beasley,hermanjustin,Gummy Vitamins,HealthyKid 3+,4
Aaron Beasley,hermanjustin,Gummy Worms,Candy City,4
Aaron Beasley,hermanjustin,Nutrional Milk,HealthyKid 3+,1
Aaron Beasley,hermanjustin,Orange Beans,Candy City,3
...,...,...,...,...
Zachary York,jesusdaniel,Gummy Worms,Candy City,18
Zachary York,jesusdaniel,Kimchi and Seaweed,Exotic Extras,12
Zachary York,jesusdaniel,Nutrional Milk,HealthyKid 3+,10
Zachary York,jesusdaniel,Orange Beans,Candy City,8


### __Total no. of items sold by brand__

In [53]:
df3 =  new_df.groupby(["brand","item"])[['quantity']].sum()
df3

Unnamed: 0_level_0,Unnamed: 1_level_0,quantity
brand,item,Unnamed: 2_level_1
Candy City,Gummy Worms,59319
Candy City,Orange Beans,59406
Exotic Extras,Beef Chicharon,59302
Exotic Extras,Kimchi and Seaweed,59390
HealthyKid 3+,Gummy Vitamins,59576
HealthyKid 3+,Nutrional Milk,58728
HealthyKid 3+,Yummy Vegetables,59429
