# Dashboard

## 01 Setup

In [1]:
#Import Libraries
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
from ipywidgets import interact

In [4]:
# Path
path = r'/Users/peanutcookie/instacart-book/'

In [None]:
# Import orders_products_merged.pkl file
df_orders = pd.read_pickle(os.path.join(path, '_database', 'orders_products_merged.pkl'))

In [None]:
df_orders.dtypes

In [None]:
# Import customers.pkl file
df_customers = pd.read_pickle(os.path.join(path, '_database', 'customers.pkl'))
df_customers.dtypes

In [None]:
# Merge dataframes
df_instacart_data = df_orders.merge(df_customers, on = 'user_id')

## 02 Sampling data set

In [None]:
# Set seed
np.random.seed(4)

In [None]:
# Create list of random floating numbers between 0 and 1 and uniform distribution
dev = np.random.rand(len(df_instacart_data)) <= 0.7

In [None]:
dev

In [None]:
# Create a sample dataframe with 70% of the data
sample70 = df_instacart_data[dev]

In [None]:
# Create a sample dataframe with 30% of the data
sample30 = df_instacart_data[~dev]

In [None]:
# Accuracy check
len(df_instacart_data)

In [None]:
len(sample70) + len(sample30)

In [None]:
# Create new dataframe for further data analysis
df_order_days_prices = sample30[['orders_day_of_week','prices']]

In [None]:
# Subset sample dataframe 
fig7sample = sample30[['order_hour_of_day', 'prices']]

In [None]:
fig8sample = sample30[['age', 'income']]

In [None]:
fig9sample = sample30[['income', 'gender', 'loyalty_flag', 'age']]

## 03 Data visualisation

In [None]:
# Visualise frequency of orders for week days
fig1 = df_orders['orders_day_of_week'].value_counts().sort_index().plot.bar(color="green")

*Fig.1 Bar chart - frequency of orders for each day of the week.*

In [None]:
# Show the density of a price (continuous) variable
fig2 = df_orders['prices'].plot.hist(bins = 20, color="green")

*Fig.2 Histogram - Density of prices*

In [None]:
# Show distribution of data points in prices
fig3 = sns.scatterplot(x = 'prices', y = 'prices',data = df_orders, color="green")

*Fig. 3 Scatterplot - Distribution of prices.*

In [None]:
# Visualise customer spending for week days
fig4 = sns.lineplot(data = df_order_days_prices, x = 'orders_day_of_week',y = 'prices', color="green")

*Fig. 4 Line chart - Change in customer spending depending on the day of the week.*

In [None]:
# Visualise customer activity during the day
fig5 = df_orders['order_hour_of_day'].plot.hist(bins = 24, color="green")

*Fig 5. Histogram - Customers activity during the day.*

In [None]:
# Statistics insights for day hours
df_orders['order_hour_of_day'].describe()

**MOST ORDERS** = Between 8 - 18 the orders number don't drop below 1.6 mln <br/>
**AVERAGE ORDERS** = At 19 orders osccilate below 1.6 Mln but are higher than 1 mln <br/>
**FEWEST ORDERS** = Orders occuring between 21 and 7 are below 1 mln 

In [None]:
# Visualise distribution of loyal customers
fig6 = df_orders['loyalty_flag'].value_counts().plot.bar(color="green")

*Fig 6. Bar chart - Distribution of customers.*

In [None]:
# Visualise expediture difference
fig7 = sns.lineplot(data = fig7sample, x = 'order_hour_of_day', y = 'prices', color="green")

*Fig. 7. Lineplot - Expediture differences.*

In [None]:
# Visualise connection between age and income
fig8 = sns.lineplot(data = fig8sample, x = 'age', y = 'income', color="green")

*Fig. 8 Line chart - Correlation between age and income.*

In [None]:
fig9 = sns.relplot(
    data=sample30,
    x="income", y="age", hue="loyalty_flag", col="gender",
)

*Fig. 9 Heatmap - Income distribution by age and gender for different customers types.*

## 03 Export

In [None]:
# Export data to pkl
df_instacart_data.to_pickle(os.path.join(path, '_database', 'instacart_data.pkl'))

In [None]:
fig1.figure.savefig(os.path.join(path, '_static/assets', 'fig1.png'))
fig2.figure.savefig(os.path.join(path, '_static/assets', 'fig2.png'))
fig3.figure.savefig(os.path.join(path, '_static/assets', 'fig3.png'))
fig4.figure.savefig(os.path.join(path, '_static/assets', 'fig4.png'))
fig5.figure.savefig(os.path.join(path, '_static/assets', 'fig5.png'))
fig6.figure.savefig(os.path.join(path, '_static/assets', 'fig6.png'))
fig7.figure.savefig(os.path.join(path, '_static/assets', 'fig7.png'))
fig8.figure.savefig(os.path.join(path, '_static/assets', 'fig8.png'))
fig9.figure.savefig(os.path.join(path, '_static/assets', 'fig9.png'))