In [15]:
import pandas as pd
import plotly.express as px

In [16]:


# Load dataset
df = pd.read_csv('simple_vegetable_sales_data.csv')

# Overview - shape, info
print(df.shape)
print(df.info())

(360, 3)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 360 entries, 0 to 359
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Month          360 non-null    int64  
 1   Sales(kg)      360 non-null    float64
 2   Supplier_Name  360 non-null    object 
dtypes: float64(1), int64(1), object(1)
memory usage: 8.6+ KB
None


In [17]:


# Quantitative analysis

# Total sales over 3 years
print(f"Total sales: \nMin: {df['Sales(kg)'].min()} \nMax: {df['Sales(kg)'].max()}")

# Distribution
fig = px.histogram(df, x="Sales(kg)")
fig.show()

# Monthly trends
fig = px.line(df, x="Month", y="Sales(kg)", color='Supplier_Name')
fig.show()


Total sales: 
Min: -245.39 
Max: 9798.1


In [18]:

# Categorical analysis

# Suppliers count
print(df['Supplier_Name'].value_counts())

# Total sales per supplier
supplier_sales = df.groupby('Supplier_Name')['Sales(kg)'].sum()
fig = px.bar(supplier_sales, x=supplier_sales.index, y='Sales(kg)')
fig.show()

Walmart      36
Carrefour    36
Tesco        36
Metro        36
Aldi         36
Kroger       36
Costco       36
Schwarz      36
Lidl         36
Target       36
Name: Supplier_Name, dtype: int64


# Seasonality

In [19]:
monthly_means = df.pivot_table(values='Sales(kg)', index='Month', aggfunc='mean')

fig = px.line(monthly_means, x=monthly_means.index, y='Sales(kg)')
fig.update_xaxes(dtick=2)
fig.show()

# Supplier Differences

In [20]:
supplier_means = df.pivot_table(values='Sales(kg)', index='Month',columns='Supplier_Name', aggfunc='mean')

fig = px.line(supplier_means, x=supplier_means.index, y=supplier_means.columns)
fig.show()

# Shipment Variability

In [21]:
fig = px.box(df, x="Month", y="Sales(kg)")
fig.show()