<a href="https://colab.research.google.com/github/aminadli/Supply-Chain-Analysis/blob/main/supply_chain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [4]:
df=pd.read_csv('https://raw.githubusercontent.com/aminadli/Supply-Chain-Analysis/main/supply_chain_data.csv')


In [5]:
df.head()

Unnamed: 0,Product type,SKU,Price,Availability,Number of products sold,Revenue generated,Customer demographics,Stock levels,Lead times,Order quantities,...,Location,Lead time,Production volumes,Manufacturing lead time,Manufacturing costs,Inspection results,Defect rates,Transportation modes,Routes,Costs
0,haircare,SKU0,69.808006,55,802,8661.996792,Non-binary,58,7,96,...,Mumbai,29,215,29,46.279879,Pending,0.22641,Road,Route B,187.752075
1,skincare,SKU1,14.843523,95,736,7460.900065,Female,53,30,37,...,Mumbai,23,517,30,33.616769,Pending,4.854068,Road,Route B,503.065579
2,haircare,SKU2,11.319683,34,8,9577.749626,Unknown,1,10,88,...,Mumbai,12,971,27,30.688019,Pending,4.580593,Air,Route C,141.920282
3,skincare,SKU3,61.163343,68,83,7766.836426,Non-binary,23,13,59,...,Kolkata,24,937,18,35.624741,Fail,4.746649,Rail,Route A,254.776159
4,skincare,SKU4,4.805496,26,871,2686.505152,Non-binary,5,3,56,...,Delhi,5,414,3,92.065161,Fail,3.14558,Air,Route A,923.440632


In [None]:
df.tail()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.columns

In [None]:
df.isnull().sum()

In [None]:
# create a bar chart of product types
plt.figure(figsize=(8,6))
sns.countplot(data=df, x='Product type')
plt.xlabel('Product Type')
plt.ylabel('Count')
plt.title('Number of Products by Type')
plt.show()


In [None]:
# create a scatter plot of price vs revenue
plt.figure(figsize=(8,6))
sns.scatterplot(data=df, x='Price', y='Revenue generated')
plt.xlabel('Price')
plt.ylabel('Revenue Generated')
plt.title('Price vs Revenue Generated')
plt.show()


In [None]:
# create a histogram of number of products sold
plt.figure(figsize=(8,6))
sns.histplot(data=df, x='Number of products sold', bins=20)
plt.xlabel('Number of Products Sold')
plt.ylabel('Count')
plt.title('Distribution of Number of Products Sold')
plt.show()


In [None]:
# create a heatmap of customer demographics
plt.figure(figsize=(10,8))
sns.heatmap(pd.crosstab(df['Product type'], df['Customer demographics']), cmap='Blues')
plt.xlabel('Customer Demographics')
plt.ylabel('Product Type')
plt.title('Product Type by Customer Demographics')
plt.show()


In [None]:
# create a box plot of lead times
plt.figure(figsize=(8,6))
sns.boxplot(data=df, x='Product type', y='Lead times')
plt.xlabel('Product Type')
plt.ylabel('Lead Times')
plt.title('Lead Times by Product Type')
plt.show()


In [None]:
# create a stacked bar chart of shipping carriers and transportation modes
plt.figure(figsize=(10,8))
pd.crosstab(df['Product type'], [df['Shipping carriers'], df['Transportation modes']]).plot(kind='bar', stacked=True)
plt.xlabel('Product Type')
plt.ylabel('Count')
plt.title('Shipping Carriers and Transportation Modes by Product Type')
plt.show()


In [None]:
# create a scatter plot of production volumes vs manufacturing costs
plt.figure(figsize=(8,6))
sns.scatterplot(data=df, x='Production volumes', y='Manufacturing costs')
plt.xlabel('Production Volumes')
plt.ylabel('Manufacturing Costs')
plt.title('Production Volumes vs Manufacturing Costs')
plt.show()


In [None]:
plt.figure(figsize=(8,6))
top_suppliers = df.groupby('Supplier name')['Revenue generated'].sum().nlargest(10)
sns.barplot(x=top_suppliers.values, y=top_suppliers.index, palette='Blues_d')
plt.xlabel('Revenue generated')
plt.ylabel('Supplier name')
plt.title('Top 10 suppliers by revenue')
plt.show()


In [None]:
plt.figure(figsize=(8,6))
sns.histplot(data=df, x='Price', kde=True, bins=30)
plt.xlabel('Price')
plt.ylabel('Frequency')
plt.title('Histogram of product prices')
plt.show()


In [None]:
plt.figure(figsize=(8,6))
sns.boxplot(data=df, x='Product type', y='Price')
plt.xlabel('Product type')
plt.ylabel('Price')
plt.title('Box plot of product prices by product type')
plt.show()


In [None]:
plt.figure(figsize=(8,6))
sns.scatterplot(data=df, x='Number of products sold', y='Revenue generated')
plt.xlabel('Number of products sold')
plt.ylabel('Revenue generated')
plt.title('Scatter plot of revenue generated vs. number of products sold')
plt.show()


In [None]:
plt.figure(figsize=(8,6))
sns.violinplot(data=df, x='Location', y='Lead times')
plt.xlabel('Supplier location')
plt.ylabel('Lead times')
plt.title('Violin plot of lead times by supplier location')
plt.show()


In [None]:
plt.figure(figsize=(12,6))
corr = df.corr()
sns.heatmap(corr, annot=True, cmap='Blues')
plt.title('Heatmap of the correlation between variables')
plt.show()


In [None]:
# group the data by product type and calculate the total revenue generated by each product
product_revenue = df.groupby('Product type')['Revenue generated'].sum()

# sort the products by revenue in descending order
product_revenue = product_revenue.sort_values(ascending=False)

# plot a bar chart of the top 10 products by revenue
plt.figure(figsize=(8,6))
sns.barplot(x=product_revenue[:10], y=product_revenue[:10].index, palette='Blues_r')
plt.xlabel('Revenue Generated')
plt.ylabel('Product Type')
plt.title('Top 10 Products by Revenue')


In [None]:
# group the data by transportation modes and calculate the total shipping costs for each mode
transportation_costs = df.groupby('Transportation modes')['Shipping costs'].sum()

# plot a pie chart of the shipping costs by transportation mode
plt.figure(figsize=(8,6))
plt.pie(transportation_costs, labels=transportation_costs.index, autopct='%1.1f%%')
plt.title('Shipping Costs by Transportation Mode')


In [None]:
# plot a scatter plot of manufacturing costs and lead times
plt.figure(figsize=(8,6))
sns.scatterplot(data=df, x='Manufacturing lead time', y='Manufacturing costs')
plt.xlabel('Manufacturing Lead Time')
plt.ylabel('Manufacturing Costs')
plt.title('Manufacturing Costs vs Lead Time')


*Based on the analysis of the given dataset, it is evident that there is a wealth of information that can be extracted to gain insights into the supply chain operations. Through the use of various visualizations, we were able to identify the most popular products, revenue generation, transportation modes, shipping costs, lead times, and manufacturing costs, among other metrics.

*This analysis can help inform strategic decision-making and identify areas for improvement in the supply chain operations. For example, it may be possible to optimize inventory management, reduce waste and inefficiencies, improve product quality, reduce carbon emissions, and strengthen supplier relationships.

*Overall, the insights gained from analyzing this dataset can help drive positive change in the supply chain, leading to improved efficiency, sustainability, and profitability.***