In [None]:
import pandas as pd
import numpy as np

# **1. Import Data**

In [None]:
df = pd.read_csv('sales_data.csv')

In [None]:
type(df)

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.iloc[10:20,0:3]

In [None]:
df.shape

In [None]:
df['Customer_Gender'].unique()

In [None]:
df['Product'].unique()

In [None]:
df['Region'].value_counts()

# **2. Explore Data**

## 2.1  Datatypes

In [None]:
df.info()

In [None]:
df['Date'] = pd.to_datetime(df['Date'])

In [None]:
df.info()

In [None]:
df[['Sales','Customer_Age','Customer_Satisfaction']].head()

In [None]:
df[df['Region'] == 'East'].head()

In [None]:
df[(df['Region'] == 'East') & (df['Customer_Gender'] == 'Female')].head()

## 2.2 Calculate Statistics

In [None]:
df.describe().T

In [None]:
df['Customer_Age'].mean(), df['Customer_Satisfaction'].median()

In [None]:
df['Customer_Age'].std()

In [None]:
df['Customer_Age'].mode()

In [None]:
df.groupby(by='Customer_Gender')['Customer_Satisfaction'].median()

In [None]:
df.groupby(by='Region')['Customer_Satisfaction'].mean()

In [None]:
df['Date'].dt.month, df['Date'].dt.year, df['Date'].dt.day

## 2.3 Add Columns

In [None]:
import calendar
df['Date_Month'] = df.apply(lambda x: f"{x['Date'].year}-{calendar.month_abbr[x['Date'].month]}", axis=1)

In [None]:
df.groupby(by='Date_Month')['Sales'].sum()

In [None]:
df.head()

In [None]:
df['Sales in Thousands'] = df['Sales'].apply(lambda x : x/1000)

In [None]:
df.head()

#**3. Data Visualizations**

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df1 = df.groupby(by='Region', as_index=False)['Customer_Satisfaction'].mean()

plt.figure(figsize = (6,4))
plt.bar(x=df1['Region'], height = df1['Customer_Satisfaction'])
plt.title("Sales by Region")
plt.xlabel("Region")
plt.ylabel("Total Sales")
plt.show()

In [None]:
df['Quarter'] = df['Date'].dt.quarter


sales_by_quarter_2024 = df[df['Date'].dt.year==2024].groupby('Quarter')['Sales'].sum()

plt.figure(figsize=(6, 3))
plt.plot(sales_by_quarter_2024.index, sales_by_quarter_2024.values, marker='o', linestyle='-')
plt.xlabel('Quarter')
plt.ylabel('Total Sales')
plt.title('Sales by Quarter 2024')
plt.xticks(sales_by_quarter_2024.index)  # Ensure all quarter labels are displayed
plt.grid(True)
plt.show()

In [None]:
plt.figure(figsize = (8,4))
total_sales = df[df['Date'].dt.year==2024].groupby(by='Date_Month', as_index=False)['Sales'].mean()
sns.lineplot(x=total_sales['Date_Month'], y=total_sales['Sales'])
plt.title("Total Sales by Date")
plt.xlabel("Date_Month")
plt.xticks(rotation=90)
plt.ylabel("Total Sales")
plt.show()

In [None]:
corrs = df[['Sales','Customer_Age','Customer_Satisfaction']].corr(method='pearson' )

In [None]:
corrs

In [None]:
import seaborn as sns

In [None]:
sns.heatmap(corrs, annot=True, cmap='bwr')
plt.show()