# Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load Dataset

In [None]:
df = pd.read_csv('supermarket_sales.csv')
display(df.head())

# Data Cleaning

In [None]:
print("\nData Types:")
print(df.dtypes)

In [None]:
print("\nChecking for Missing Values:")
print(df.isnull().sum())

In [None]:
# Removing Duplicates
df = df.drop_duplicates()

# Data Processing 

In [None]:
# Example: Creating Total Sales Column
df['Total Sales'] = df['Unit price'] * df['Quantity']
df

In [None]:
# Example: Extracting Day and Month
df['Date'] = pd.to_datetime(df['Date'])
df['Day'] = df['Date'].dt.day
df['Month'] = df['Date'].dt.month
df

# Data Aggregation

In [None]:
monthly_sales_trend = df.groupby('Month')['Total'].sum()
gender_distribution = df['Gender'].value_counts(normalize=True) * 100

# Data Visualization

In [None]:
# Bar Chart - Total Sales by City
plt.figure(figsize=(10, 6))
sns.barplot(x='City', y='Total Sales', data=df, estimator=np.sum)
plt.title('Total Sales by City')
plt.show()


In [None]:
# Pie Chart - Payment Method Distribution
payment_counts = df['Payment'].value_counts()
plt.figure(figsize=(5, 5))
plt.pie(payment_counts, labels=payment_counts.index, autopct='%1.1f%%', startangle=140)
plt.axis('equal')
plt.title('Payment Method Distribution')
plt.show()

In [None]:
# Visualization 3: Monthly Sales Trend (Bar Chart)
plt.figure(figsize=(10, 6))
sns.barplot(x=monthly_sales_trend.index, y=monthly_sales_trend.values, palette='viridis')
plt.title('Monthly Sales Trend')
plt.xlabel('Month')
plt.ylabel('Total Sales ($)')
plt.xticks(range(1, 13))
plt.show()

In [None]:
# Visualization for Customer Gender Analysis (Pie Chart)
plt.figure(figsize=(5, 5))
plt.pie(gender_distribution, labels=gender_distribution.index, autopct='%1.1f%%', startangle=140, colors=['#ff9999','#66b3ff'])
plt.axis('equal')
plt.title('Customer Gender Distribution')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.lineplot(x='Day', y='Total Sales', data=df, hue='Branch')
plt.title('Daily Sales Trend')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(df['Total Sales'], bins=30, kde=True)
plt.title('Sales Distribution')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.barplot(x='Product line', y='Total Sales', data=df)
plt.xticks(rotation=45)
plt.title('Sales by Product Line')
plt.show()


In [None]:
# Histogram for Unit Price
plt.figure(figsize=(10, 6))
sns.histplot(df['Unit price'], bins=20, kde=True)
plt.title('Distribution of Unit Price')
plt.show()

In [None]:
# Heatmap for Correlation
plt.figure(figsize=(10, 6))
corr = df.corr()
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()


In [None]:
# Boxplot for Total Sales by Product Line
plt.figure(figsize=(10, 6))
sns.boxplot(x='Product line', y='Total Sales', data=df)
plt.xticks(rotation=45)
plt.title('Boxplot of Total Sales by Product Line')
plt.show()