In [None]:
# import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [None]:
#import csv file
df = pd.read_csv("Sales_Data.csv", encoding= "unicode_escape")

In [None]:
#check number of rows and columns
df.shape

In [None]:
#to see the imported data
df.head()

In [None]:
#field details and data type
df.info()

### __Data Cleaning__

In [None]:
#Deleting blank column
df.drop(['Zipcode'], axis=1, inplace=True)

In [None]:
#List of Columns Available
df.columns

In [None]:
#check for null values
pd.isnull(df).sum()

In [None]:
# drop null values
df.dropna(how='all', inplace=True)

In [None]:
df.shape

In [None]:
#replace value of Gender Column
df["Gender"]=df["Gender"].replace("M","Male")


In [None]:
#View only Male Gender data
df[df["Gender"]=="Male"]

### __EDA- Exploratory Data Analysis__

In [None]:
# describe() method returns description of the data in the DataFrame (i.e. count, mean, std, etc)
df.describe()

In [None]:
# use describe() for specific columns
df[["Amount", "Age","Orders"]].describe()

In [None]:
# Total Transactions count by Gender Wise in Bar Chart
ax=sns.countplot(x="Gender", data=df)
ax.set_title("Gender Wise Transactions", fontsize=16)   # Chart Title

#set titles and labels with font size
for bar in ax.containers:
    ax.bar_label(bar)
    ax.set_xlabel("Gender")                                 # X - Axis Label
    ax.set_ylabel("Transactions")                           # Y - Axis Label
    ax.tick_params(axis="both",labelsize=14)
plt.show()

In [None]:
# Gender wise Total Sales Amount in Bar Chart

# Step 1: Group data by Gender and sum the Amount
Total_Amount=df.groupby('Gender', as_index=False)['Amount'].sum().sort_values(by='Amount', ascending=False)

# Step 2: Create the bar plot
ax=sns.barplot(x='Gender',y='Amount',data=Total_Amount)

# Step 3: Add data labels on top of bars
for bar in ax.containers:
    ax.bar_label(bar, fmt='%.0f')
    
#Step 4: Add titles
plt.title('Gender Wise Total Sales Amount', fontsize=16)
plt.show()

In [None]:
#Gender Wise Distribution in Pie Chart
gender_counts=df['Gender'].value_counts()

plt.pie(
    gender_counts,                          #Data Count
    labels=gender_counts.index,             #Labels
    autopct='%.f%%',                       #Shows percentage
    startangle=90,                          #Rotate for better orientation
)

#Add a title
plt.title('Gender Distribution', fontsize=16)
plt.show() 

Female customers contributed a significantly higher share of total revenue and transaction volume than male customers, indicating stronger purchasing activity within this segment.

### __Age Group wise Total Transactions__

In [None]:
#Age Group wise Transactions Count in Bar Chart

#Sorting Values
age_groups_count=df['Age Group'].value_counts().sort_values(ascending=False)

# Use Ordered categories for sorting in countplot
sns_order=age_groups_count.index

ax=sns.countplot(x='Age Group', data=df, order=sns_order)
ax.set_title('Age Group wise Transactions', fontsize=16)

for bars in ax.containers:   #Show Data Labels
    ax.bar_label(bars)
plt.show()

In [None]:
#Age group and Gender wise Transactions Count in Bar Chart
ax=sns.countplot(x='Age Group', data=df, hue='Gender', order=sns_order)
ax.set_title('Age Group and Gender wise Transactions', fontsize=16)

for bars in ax.containers:     #Show Data Labels
    ax.bar_label(bars)

plt.show()

In [None]:
#Age Group wise Total Amount in Bar Chart
Total_Amount=df.groupby(['Age Group'], as_index=False)['Amount'].sum().sort_values(by='Amount',ascending=False)
ax=sns.barplot(x='Age Group',data=Total_Amount, y='Amount')
ax.set_title('Age Group wise Total Amount', fontsize=16)

for bar in ax.containers:     #Show Data Labels 
    ax.bar_label(bar, fmt='%.0f')
    
plt.show()  

The 26–35 age group emerged as the most dominant segment, contributing the highest revenue and transaction count, indicating strong purchasing power and engagement within this demographic.

### __State wise Analysis__

In [None]:
#Order wise Top 5 state in Bar Chart
order_state=df.groupby('State', as_index=False)['Orders'].sum().sort_values(by='Orders', ascending=False).head()

ax=sns.barplot(x='State', y='Orders', data=order_state)
ax.set_title('Order wise States', fontsize=16)

for bar in ax.containers:
    ax.bar_label(bar)

plt.show()

In [None]:
#Sales Amount Wise top 5 state in Bar Chart
sales_state=df.groupby('State', as_index=False)['Amount'].sum().sort_values(by='Amount', ascending=False).head()

ax=sns.barplot(x='State', y='Amount', data=sales_state)
ax.set_title('Sales wise States', fontsize=16)

for bar in ax.containers:
    ax.bar_label(bar, fmt='%.0f')
    
plt.show()

Uttar Pradesh, Maharashtra, Karnataka, Delhi, and Madhya Pradesh emerged as the top-performing states in terms of both revenue generation and order volume, indicating strong market demand and customer engagement in these regions.

### __Product Category Analysis__

In [None]:
#Product category wise Transactions Count in Bar Chart

#Sorting Values
product_category=df['Product_Category'].value_counts().sort_values(ascending=False)

#Sorting in Count Plot
sort_order=product_category.index

ax=sns.countplot(x='Product_Category', data=df,order=sort_order)
ax.set_title('Product Catgory wise Transactions', fontsize=16)

for bar in ax.containers:
    ax.bar_label(bar)
    
plt.show()

In [None]:
#Amount wise Product Category in Bar Chart
amount_sales=df.groupby('Product_Category', as_index=False)['Amount'].sum().sort_values(by='Amount', ascending=False)

ax=sns.barplot(x='Product_Category', y='Amount', data=amount_sales)
ax.set_title('Amount wise Product Category', fontsize=16)

for bar in ax.containers:
    ax.bar_label(bar,fmt='%.0f')
plt.show()

In [None]:
#Product category and Gender wise Amount in Bar Chart
sales_pro=df.groupby(['Product_Category','Gender'], as_index=False)['Amount'].sum().sort_values(by='Amount', ascending=False)
ax=sns.barplot(x='Product_Category',hue='Gender',y='Amount',data=sales_pro)
ax.set_title('Product Category and Gender wise Amount', fontsize=16)

sns.set(rc={'figure.figsize':(20,7)})
for bar in ax.containers:
    ax.bar_label(bar,fmt='%.0f')
plt.show()

The Beauty category emerged as the top-performing segment in both revenue and transaction count, indicating strong customer demand and consistent purchasing behavior

### __Profession Wise Analysis__

In [None]:
#Profession wise Transactions Count in Bar Chart

#Sorting Values
trans_sort=df['Profession'].value_counts().sort_values(ascending=False)
trans_order= trans_sort.index

sns.set(rc={'figure.figsize':(20,5)})

ax=sns.countplot(x='Profession', data=df, order=trans_order)
ax.set_title('Profession wise Transactions',fontsize=16)

for bars in ax.containers:
    ax.bar_label(bars)
plt.show()

In [None]:
#Profession wise Transactions Count in PieChart
profession_count=df['Profession'].value_counts().head()

plt.pie(
    profession_count,      #Data Count
    labels=profession_count.index,    #Labels
    autopct='%.2f%%',                 #Shows Percentage
    startangle=90, 
)

#Add a title
plt.title('Profession Distribution', fontsize=16)
plt.show()   

From above graphs we can see that most of the buyers are working in IT, Healthcare and Aviation sector.

In [None]:
#Order wise Top 10 Product in Bar Chart
sales_state=df.groupby('Product_ID', as_index=False)['Orders'].sum().sort_values(by='Orders', ascending=False).head(10)

ax=sns.barplot(x='Product_ID', data=sales_state, y='Orders')
ax.set_title('Order wise Tope 10 Product',fontsize=16)

for bar in ax.containers:
    ax.bar_label(bar)
plt.show()

Conclusion: The findings suggest that marketing strategies, inventory planning, and promotional campaigns should focus primarily on female customers aged 26–35, particularly in high-performing states and within the Beauty category. Targeting professionals in IT, Healthcare, and Aviation sectors may further enhance revenue growth.