In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
data = pd.read_csv('data.csv', encoding='ISO-8859-1')
data.head()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
data = pd.read_csv('data.csv', encoding='ISO-8859-1')
data.head()

In [None]:
# Data Cleaning and Preprocessing

# Removing cancelled orders
data = data[data['Quantity'] > 0]

# Removing rows where customerID is NA
data.dropna(subset=['CustomerID'], inplace=True)

# Adding a new column for total price
data['TotalPrice'] = data['Quantity'] * data['UnitPrice']

# Convert the InvoiceDate from object type to datetime type
data['InvoiceDate'] = pd.to_datetime(data['InvoiceDate'])
data.head()

In [None]:
# Top Products Analysis

# Grouping by StockCode and Description to get the total quantity of each product sold
top_products = data.groupby(['StockCode', 'Description'])['Quantity'].sum().reset_index()

# Sorting the products by quantity in descending order to get the top selling products
top_products = top_products.sort_values(by='Quantity', ascending=False).head(10)

# Plotting the top selling products
plt.figure(figsize=(15, 10))
sns.barplot(data=top_products, y='Description', x='Quantity', palette='viridis')
plt.title('Top 10 Selling Products')
plt.xlabel('Total Quantity Sold')
plt.ylabel('Product Description')
plt.show()

In [None]:
# Monthly Sales Trend Analysis

# Extracting month and year from InvoiceDate
data['InvoiceMonth'] = data['InvoiceDate'].dt.to_period('M')

# Grouping by InvoiceMonth to get the total sales for each month
monthly_sales = data.groupby('InvoiceMonth')['TotalPrice'].sum().reset_index()

# Plotting the monthly sales trend
plt.figure(figsize=(15, 10))
sns.lineplot(data=monthly_sales, x='InvoiceMonth', y='TotalPrice', marker='o', color='teal')
plt.title('Monthly Sales Trend')
plt.xlabel('Month')
plt.ylabel('Total Sales')
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.show()

In [None]:
# Sales Distribution by Country Analysis

# Grouping by Country to get the total sales for each country
sales_by_country = data.groupby('Country')['TotalPrice'].sum().reset_index()

# Sorting the countries by sales in descending order
sales_by_country = sales_by_country.sort_values(by='TotalPrice', ascending=False)

# Plotting the sales distribution by country
plt.figure(figsize=(15, 10))
sns.barplot(data=sales_by_country, y='Country', x='TotalPrice', palette='viridis')
plt.title('Sales Distribution by Country')
plt.xlabel('Total Sales')
plt.ylabel('Country')
plt.show()

In [None]:
# Sales Distribution by Hour Analysis

# Extracting hour from InvoiceDate
data['Hour'] = data['InvoiceDate'].dt.hour

# Grouping by Hour to get the total sales for each hour
sales_by_hour = data.groupby('Hour')['TotalPrice'].sum().reset_index()

# Plotting the sales distribution by hour
plt.figure(figsize=(15, 10))
sns.barplot(data=sales_by_hour, x='Hour', y='TotalPrice', palette='viridis')
plt.title('Sales Distribution by Hour')
plt.xlabel('Hour')
plt.ylabel('Total Sales')
plt.show()

In [None]:
# Sales Distribution by Day of the Week Analysis

# Extracting day of the week from InvoiceDate
data['DayOfWeek'] = data['InvoiceDate'].dt.dayofweek

# Mapping the day of the week to its name
days = {0:'Mon', 1:'Tue', 2:'Wed', 3:'Thu', 4:'Fri', 5:'Sat', 6:'Sun'}
data['DayOfWeek'] = data['DayOfWeek'].map(days)

# Grouping by DayOfWeek to get the total sales for each day of the week
sales_by_day = data.groupby('DayOfWeek')['TotalPrice'].sum().reindex(['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']).reset_index()

# Plotting the sales distribution by day of the week
plt.figure(figsize=(15, 10))
sns.barplot(data=sales_by_day, x='DayOfWeek', y='TotalPrice', palette='viridis')
plt.title('Sales Distribution by Day of the Week')
plt.xlabel('Day of the Week')
plt.ylabel('Total Sales')
plt.show()