In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings(action="ignore")

In [None]:
df=pd.read_csv("Sales Dataset.csv")
df.head(4)

In [None]:
df.shape

In [None]:
df.isnull().sum().sum()

In [None]:
df.duplicated().sum()

Which age group and gender combination contributes most to sales?


In [None]:
grouped_sales = df.groupby(['Age', 'Gender'])['Total Amount'].sum().reset_index()
max_sales_row = grouped_sales.loc[grouped_sales['Total Amount'].idxmax()]
print("Age Group and Gender combination with highest sales: ","\n")
print(max_sales_row)

In [None]:
sns.set_style("whitegrid")
sns.set_palette("Paired")


plt.figure(figsize=(12,6))
sns.barplot(data=grouped_sales, x='Age', y='Total Amount', hue='Gender')

plt.title('Total Sales by Age and Gender', fontsize=16)
plt.xlabel('Age', fontsize=14)
plt.ylabel('Total Amount', fontsize=14)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


Show the same result using animation_frame

In [None]:
import plotly.express as px

grouped_sales_time = df.groupby(['Date', 'Age', 'Gender'])['Total Amount'].sum().reset_index()

fig = px.bar(
    grouped_sales_time,
    x='Age',
    y='Total Amount',
    color='Gender',
    animation_frame='Date',
    barmode='group',
    title='Sales by Age and Gender Over Time',
    color_discrete_sequence=px.colors.qualitative.Set2
)

fig.update_layout(
    xaxis_title='Age',
    yaxis_title='Total Amount',
    xaxis_tickangle=-45,
    template='plotly_white'
)

fig.show()


By using 2d plot

In [None]:
fig = px.bar(
    grouped_sales,
    x='Age',
    y='Total Amount',
    color='Gender',
    barmode='group',
    title='Total Sales by Age and Gender',
    color_discrete_sequence=px.colors.qualitative.Set2
)

fig.update_layout(
    xaxis_title='Age',
    yaxis_title='Total Amount',
    xaxis_tickangle=-45,
    template='plotly_white'
)

fig.show()


Using 3d plot

In [None]:

# Group data by Age, Gender and sum Total Amount
grouped_sales = df.groupby(['Age', 'Gender'])['Total Amount'].sum().reset_index()


fig = px.scatter_3d(
    grouped_sales,
    x='Age',
    y='Gender',
    z='Total Amount',
    color='Gender',
    size='Total Amount', 
    title='3D Scatter Plot of Sales by Age and Gender',
    labels={'Age': 'Age Group', 'Gender': 'Gender', 'Total Amount': 'Total Sales'}
)

fig.update_layout(scene=dict(
    xaxis_title='Age Group',
    yaxis_title='Gender',
    zaxis_title='Total Sales'
))

fig.show()




What are the top 3 product categories by revenue and quantity?



In [None]:
category_summary = df.groupby('Product Category')[['Total Amount', 'Quantity']].sum().reset_index()
top_revenue = category_summary.sort_values(by='Total Amount', ascending=False).head(3)
top_quantity = category_summary.sort_values(by='Quantity', ascending=False).head(3)

light_colors = ['#AEC6CF', '#FFB347', '#BFD8B8']

#Top 3 by Revenue 
fig1 = px.bar(top_revenue, x='Product Category', y='Total Amount',
              title='Top 3 Product Categories by Revenue',
              color='Product Category',
              text='Total Amount',
              color_discrete_sequence=light_colors,
              width=680, height=370)
fig1.update_traces(texttemplate='%{text:.2s}', textposition='auto')
fig1.update_layout(xaxis_title='Product Category', yaxis_title='Total Revenue',margin=dict(t=100))

#Top 3 by Quantity 
fig2 = px.bar(top_quantity, x='Product Category', y='Quantity',
              title='Top 3 Product Categories by Quantity',
              color='Product Category',
              text='Quantity',
              color_discrete_sequence=light_colors,
              width=680, height=370)

fig2.update_traces(texttemplate='%{text}', textposition='auto')
fig2.update_layout(xaxis_title='Product Category', yaxis_title='Total Quantity',margin=dict(t=100) )



fig1.show()
fig2.show()


On which months do sales peak or drop?

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df['Date']

In [None]:

df['Month'] = df['Date'].dt.month_name()
df['Month']

In [None]:

month_order = ['January', 'February', 'March', 'April', 'May', 'June',
               'July', 'August', 'September', 'October', 'November', 'December']

monthly_sales = (
    df
    .groupby('Month', sort=False)['Total Amount']
    .sum()
    .reindex(month_order)
    .reset_index()
)

fig = px.bar(
    monthly_sales,
    x='Month',
    y='Total Amount',
    title='Monthly Sales Trend',
    color='Total Amount',
    color_continuous_scale='Sunset',
    text='Total Amount',
    width=800,
    height=400
)

fig.update_traces(
    texttemplate='%{text:,.0f}',
    textposition='auto'
)
fig.update_layout(
    xaxis_title='Month',
    yaxis_title='Total Sales',
    xaxis=dict(
        categoryorder='array',
        categoryarray=month_order
    ),
    uniformtext_minsize=8,
    uniformtext_mode='hide',
    margin=dict(t=100)
)

fig.show()


Do men or women spend more on average?

In [None]:

avg_spending = df.groupby('Gender')['Total Amount'].mean().reset_index()

pastel_colors = ['#B2DFDB', '#FFD54F']

fig = px.bar(
    avg_spending,
    x='Gender',
    y='Total Amount',
    title='Average Spending by Gender',
    color='Gender',
    color_discrete_sequence=pastel_colors,
    text='Total Amount',
    width=600,
    height=400
)

fig.update_traces(
    texttemplate='$%{text:.2f}',
    textposition='auto'
)


fig.update_layout(
    xaxis_title='Gender',
    yaxis_title='Average Spending',
    plot_bgcolor='white',
    yaxis=dict(tickformat='$.2f'),
    margin=dict(t=100)
)

fig.show()




Which age group buys the most quantity per transaction?


In [None]:

avg_quantity = df.groupby('Age')['Quantity'].mean().reset_index()
pastel_colors = px.colors.qualitative.Pastel

fig = px.bar(
    avg_quantity,
    x='Age',
    y='Quantity',
    title='Average Quantity Bought per Transaction by Age Group',
    color='Age',
    color_discrete_sequence=pastel_colors,
    text='Quantity',
    width=1400,
    height=400
)

fig.update_traces(
    texttemplate='%{text:.2f}',
    textposition='auto'
)

fig.update_layout(
    xaxis_title='Age Group',
    yaxis_title='Average Quantity',
    plot_bgcolor='white',
    margin=dict(t=100)
)

fig.show()




Is there a relationship between age and spending behavior?


In [None]:
from google.colab import files
fig = px.scatter(df, x='Age', y='Total Amount',
                 title='Relationship Between Age and Spending Behavior',
                 trendline='ols',
                 color_discrete_sequence=['#89CFF0'],
                 width=800, height=400)

for trace in fig.data:
    if trace.mode == 'lines':  
        trace.line.color = 'blue'
        trace.line.width = 3  

fig.update_layout(xaxis_title='Age', yaxis_title='Total Spending',
                  plot_bgcolor='white')

fig.show()

