# Demo-9-Supermarket

In [None]:
import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt

from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
sales = pd.read_csv('supermarket_sales.csv')
sales.sample(3)

In [None]:
sales.info()

In [None]:
sales.isin([np.inf, -np.inf, np.nan]).sum()

## Change to obj [‘Date’] to datetime [‘date’]

In [None]:
sales['date'] = pd.to_datetime(sales['Date'])

In [None]:
sales['date']

In [None]:
sales['date'].dtype

In [None]:
sales['day'] = (sales['date']).dt.day
sales['month'] = (sales['date']).dt.month
sales['year'] = (sales['date']).dt.year

In [None]:
sales[['day','month','year']]

## Change the obj to datetime and classify as Hour

In [None]:
sales['Time'] = pd.to_datetime(sales['Time'], format="%H:%M")  # ie.:  format='%Y-%m-%d %H:%M:%S.%f'
sales['Hour'] = (sales['Time']).dt.hour

In [None]:
# the opening hour
sorted(sales['Hour'].unique())

In [None]:
sales.describe()

## Investigate the categorical columns

In [None]:
# the object columns
[col for col in sales.columns if sales[col].dtype == "object"]

In [None]:
print(f"{sales['Branch'].nunique()} of unique values in Branch: {sales['Branch'].unique().tolist()}")
print(f"{sales['City'].nunique()} of unique values in City: {sales['City'].unique().tolist()}")
print(f"{sales['Customer type'].nunique()} of unique values in Customer Type: {sales['Customer type'].unique().tolist()}")
print(f"{sales['Gender'].nunique()} of unique values in Gender: {sales['Gender'].unique().tolist()}")
print(f"{sales['Product line'].nunique()} of unique values in Product Line: {sales['Product line'].unique().tolist()}")
print(f"{sales['Payment'].nunique()} of unique values in Payment: {sales['Payment'].unique().tolist()}")

## Gender 

In [None]:
sns.set(style="darkgrid")       
genderCount  = sns.countplot(x="Gender", data =sales).set_title("Gender_Count")

In [None]:
# Female client
sales['Gender'].loc[sales['Gender']=='Female'].count()

In [None]:
# Male client
sales['Gender'].loc[sales['Gender']=='Male'].count()

## Branch ratings

In [None]:
sns.boxplot(x="Branch", y = "Rating" ,data =sales).set_title("Ratings by Branch") 

## Product Sales per Hour

In [None]:
sns.lineplot(x="Hour",  y = 'Quantity',data =sales).set_title("Product Sales per Hour")

## Compare each branch sales quantity in 3 months

In [None]:
sns.relplot(x="Hour",  y = 'Quantity', col= 'month' , row= 'Branch', 
            kind="line", hue="Gender", style="Gender", data =sales)

## Compare each branch sales amount in 3 months

In [None]:
sns.relplot(x="Hour",  y = 'Total', col= 'month' , row= 'Branch', 
            estimator = None, kind="line", data =sales)

## Ratings vs Amount by gender

In [None]:
sns.jointplot(data =sales, x="Total", y = "Rating", hue="Gender",height=8 )

In [None]:
sns.violinplot( data=sales,y = 'Product line', x = 'Rating',
                hue = 'Gender',split=True)

In [None]:
sales['Rating'].loc[sales['Gender']=='Female'].mean()

In [None]:
sales['Rating'].loc[sales['Gender']=='Male'].mean()

## Product type vs Shopping quantity

In [None]:
sns.boxenplot(y = 'Product line', x = 'Quantity', data=sales )

## Product line

In [None]:
sns.countplot(y = 'Product line', data=sales, 
              order = sales['Product line'].value_counts().index )

## Product type vs Shopping Amount

In [None]:
sns.boxenplot(y = 'Product line', x = 'Total', data=sales )

## Shopping amount vs Products on Gender

In [None]:
sns.violinplot( data=sales,y = 'Product line', x = 'Total',
                hue = 'Gender',split=True)

## Gross income on Product

In [None]:
sns.boxenplot(y = 'Product line', x = 'gross income', data=sales )

## Product rating

In [None]:
sns.boxplot(y = 'Product line', x = 'Rating', data=sales )

## Payment channel

In [None]:
sns.countplot(x="Payment", data =sales).set_title("Payment Channel")

## Payment channel on each branch

In [None]:
sns.countplot(x="Payment", hue = "Branch", 
              data =sales).set_title("Payment Channel by Branch") 

## Customer membership

In [None]:
sns.countplot(x="Customer type", hue = "Branch", 
              data =sales).set_title("Customer Type by Branch") 

In [None]:
sns.barplot(x="Customer type", y="Total", estimator = sum, data=sales)

## Sales amount on membership

In [None]:
sales.groupby(['Customer type']).agg({'Total': 'sum'})

## Do the customer type influence customer rating?

In [None]:
sns.violinplot(data=sales, y = 'Product line', x = 'Rating',
                hue = 'Customer type',split=True)

In [None]:
# Member's average rating
sales['Rating'].loc[sales['Customer type']=='Member'].mean()

In [None]:
# Non-Member's average rating
sales['Rating'].loc[sales['Customer type']=='Normal'].mean()

## Quantity vs Total sales

In [None]:
sns.lmplot(x="Total",  y = "Quantity", data =sales)