## Diwali Sales Analysis Using Python. #

# Import Python libraries #

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt #visualizing data
%matplotlib inline 
import seaborn as sns

In [None]:
df = pd.read_csv('Diwali Sales Data.csv',encoding='unicode_escape')

# Data Cleaning


In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

# Drop unrelated/blank column

In [None]:
df.isnull().sum()

In [None]:
#drop unrelated/blank columns
df.drop(['Status','unnamed1'], axis=1, inplace=True)

In [None]:
#checking for null values
df.isnull().sum()

In [None]:
df.dropna(inplace=True)

In [None]:
df['Amount'] = df['Amount'].astype('int')

In [None]:
df['Amount'].dtypes

In [None]:
df.columns

# Renaming Column 

In [None]:
df.rename(columns = {'Marital_Status': 'Shaadi'})

# describe() method returns description of the data in the dataFrame (i.e. Count, Mean, Median, Std etc )

In [None]:
df.describe()

In [None]:
df[['Age','Orders','Amount']].describe()

# Exploratory Data Analysis

## 1. Gender Wise

In [None]:
# plotting a bar chart for Gender and it's count

ax = sns.countplot(x='Gender', data=df)
for bars in ax.containers:
    ax.bar_label(bars)
ax.set_title('Counting the total customers based on Gender') 


In [None]:
# plotting a bar chart for gender vs total amount

sales_gen = df.groupby(['Gender'], as_index = False)['Amount'].sum().sort_values(by='Amount')

sns.barplot(x='Gender',y='Amount', data = sales_gen)
                       


## 2. Age Wise


In [None]:
#counting the number of people in each age category.

ax = sns.countplot(data=df, x='Age Group', hue='Gender')
for bars in ax.containers:
    ax.bar_label(bars)

ax.set_title('Age Wise Bar Graph')

In [None]:
# Total Amount Vs Age Group 

sales_age = df.groupby(['Age Group',"Gender"], as_index=False).sum().sort_values(by='Amount')
plt.figure(figsize=(10,4))
sns.barplot(x="Age Group", y="Amount", data=sales_age, hue="Gender")

## Now we will see that most of the buyers are from which age group and Gender?

## 1. State Wise


In [None]:
# total number of ORDERS from top 10 states

sales_state = df.groupby(['State'], as_index=False).sum().sort_values(by='Orders', ascending = False).head(10)
sns.set(rc={'figure.figsize':(12,5)})
sns.barplot(x="State", y="Orders", data=sales_state)
plt.xticks(rotation = 45)

In [None]:
# total number of Amount from top 10 states

sales_state = df.groupby(['State'], as_index=False).sum().sort_values(by='Amount', ascending = False).head(10)
sns.set(rc={'figure.figsize':(12,5)})
sns.barplot(x="State", y="Amount", data=sales_state)
plt.xticks(rotation = 25)

## 2. Marital Status Wise


In [None]:
ax = sns.countplot(data=df, x ='Marital_Status')

sns.set(rc={'figure.figsize':(10,5)})
for bars in ax.containers:
    ax.bar_label(bars)

In [None]:
sales_state = df.groupby(['Marital_Status', 'Gender'], as_index=False)['Amount'].sum().sort_values(by='Amount', ascending=False)

sns.set(rc={'figure.figsize':(10,5)})
sns.barplot(data = sales_state, x = 'Marital_Status',y= 'Amount', hue='Gender')

In [None]:
sales_state = df.groupby(['Marital_Status','Gender'], as_index= False)['Amount'].sum().sort_values(by='Amount', ascending=False)

sns.set(rc={'figure.figsize':(10,5)})
sns.barplot(data=sales_state, x='Marital_Status',y='Amount',hue='Gender')

## 3.Occupation wise

In [None]:
ax = sns.countplot(data=df, x ='Occupation')

for bars in ax.containers:
    ax.bar_label(bars)
    
sns.set(rc={'figure.figsize':(20,5)})
plt.xticks(rotation =25)

In [None]:
sales_state = df.groupby(['Occupation'], as_index= False)['Amount'].sum().sort_values(by='Amount', ascending=False)

sns.set(rc={'figure.figsize':(20,5)})
sns.barplot(data=sales_state, x='Occupation',y='Amount')
plt.xticks(rotation =45)


## 4.Product Category


In [None]:
ax = sns.countplot(data=df, x ='Product_Category')

for bars in ax.containers:
    ax.bar_label(bars)
    
sns.set(rc={'figure.figsize':(20,5)})
plt.xticks(rotation =25)

In [None]:
sales_state = df.groupby(['Product_Category'], as_index= False)['Amount'].sum().sort_values(by='Amount', ascending=False).head(10)

sns.set(rc={'figure.figsize':(20,5)})
sns.barplot(data=sales_state, x='Product_Category',y='Amount')
plt.xticks(rotation =45)

We will then see what is the most sold products from the entire Product_Category

In [None]:
sales_state = df.groupby(['Product_ID'], as_index= False)['Orders'].sum().sort_values(by='Orders', ascending=False).head(10)

sns.set(rc={'figure.figsize':(20,5)})
sns.barplot(data=sales_state, x='Product_ID',y='Orders')
plt.xticks(rotation =45)

In [None]:
# top 10 most sold products (same thing as above)

fig1, ax1 = plt.subplots(figsize=(12,7))
df.groupby('Product_ID')['Orders'].sum().nlargest(10).sort_values(ascending=False).plot(kind='bar')
plt.xticks(rotation = 25)

# ** CONCLUSION:**

Married women age group 26-35 yrs from UP, Maharastra and Karnataka working in IT, Healthcare and Aviation are more likely to buy products from Food, Clothing and Electronics category