# Exploring E-Commerce Sales

In [None]:
%run cleaning_ecommerce_sales.ipynb
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import functions

df_cat, df_all, df_euro_all = import_df_ecommerce() # type: ignore


## Check overall Euro Area values

In [None]:
df_euro_years = df_euro_all.pivot(index=["country"], columns='year', values='sales').reset_index()
df_euro_years

In [27]:
# Preprocess for plotting

df_euro_years.set_index('country', inplace=True) # set country as index
df_transposed = df_euro_years.T # transposing DF to have years as rows and country as columns
df_transposed.index = df_transposed.index.astype(int) # Convert the index (years) to integers for proper plotting /!\

In [None]:
functions.plot_line_chart(
    df_transposed, 
    'Year', 
    '% of E-Commerce Sales by Size Group', 
    '% of Sales that are from E-Commerce by Size Group (2010-2024)',
    'Region',
    )

In [None]:
euro_2024 = df_euro_years[2024]
euro_2024.name

labels = 'Sales from E-Commerce', 'Others'
sizes = [euro_2024.values[0], 100 - euro_2024.values[0]]



fig, ax = plt.subplots()
ax.pie(sizes, labels=labels, autopct='%1.1f%%')

## Hypotesis
- Smaller firms grew more in comparison to bigger firms in the latest years as technology became more affordable

### All time historical ranking category country

In [None]:
df_all.sort_values(by="sales", ascending=False)[["country", "year", "sales"]]

In [None]:
df_pivot_size = df_cat.pivot(index=['size_emp', 'region', "country"], columns='year', values='sales').reset_index()
df_pivot_size

In [None]:
df_region_group = df_pivot_size.groupby(["region", "size_emp"]).mean(numeric_only=True).round(2)
df_region_group

In [None]:
df_pivot_all = df_all.pivot(index=['country'], columns='year', values='sales').reset_index()
df_pivot_all

In [34]:
# Preprocess for plotting
df_pivot_all.set_index('country', inplace=True) # set country as index
df_transposed = df_pivot_all.T # transposing DF to have years as rows and country as columns
df_transposed.index = df_transposed.index.astype(int) # Convert the index (years) to integers for proper plotting /!\

In [None]:
# Plot the data
functions.plot_line_chart(
    df_transposed, 
    'Year', 
    '% of E-Commerce Sales', 
    '% of Sales that are from E-Commerce by by Country (2010-2024)',
    'Country',
    )

# This is too messy and we cannot have conclusons

### % of Sales that are from E-Commerce by European Region (2010-2024)

In [None]:
df_regions_years = df_all.pivot(index=["country", "region"], columns="year", values="sales").reset_index()

df_region_group = df_regions_years.groupby("region").mean(numeric_only=True).round(2)

df_region_group

# df_regions_years = df_all.pivot(index=["region"], columns="year", values="sales").reset_index()
# df_regions_years

In [37]:
# Preprocess for plotting

# df_region_group.set_index('region', inplace=True) # set country as index
df_transposed = df_region_group.T # transposing DF to have years as rows and country as columns
df_transposed.index = df_transposed.index.astype(int) # Convert the index (years) to integers for proper plotting /!\

In [None]:
# Plot the data
functions.plot_line_chart(
    df_transposed, 
    'Year', 
    '% of E-Commerce Sales', 
    '% of Sales that are from E-Commerce by European Region (2010-2024)',
    'Region',
    )

In [None]:
df_pivot_size = df_cat.pivot(index=['size_emp', 'country'], columns='year', values='sales').reset_index()
df_pivot_size


In [None]:
# Statistical values overall by enterprise size groups
df_cat.groupby("size_emp")[["size_emp", "country","sales"]].agg({"sales": {"mean", "min", "max", "std"}}).round(2)

In [None]:
# Mean values for the % of sales for each enterprise size group year over year
df_mean_by_year = (df_cat.
                   pivot_table(
                       index='size_emp', 
                       columns='year', 
                       values='sales',
                       aggfunc='mean'
                       )
                       .round(2)
                       .sort_index())

df_mean_by_year.reset_index()

In [42]:
# Preprocess for plotting

# df_region_group.set_index('region', inplace=True) # set country as index
df_transposed = df_mean_by_year.T # transposing DF to have years as rows and country as columns
df_transposed.index = df_transposed.index.astype(int) # Convert the index (years) to integers for proper plotting /!\

In [None]:
functions.plot_line_chart(
    df_transposed, 
    'Year', 
    '% of E-Commerce Sales by Size Group', 
    '% of Sales that are from E-Commerce by Size Group (2010-2024)',
    'Region',
    )

## Ranking of top digitalized countries per year.

In [None]:
# Top countries by sales for each combination
df_top = (df_cat
    .reset_index()
    .groupby(['size_emp', 'year'])
    .apply(lambda x: x.nlargest(1, 'sales'))
    .reset_index(drop=True)
)

# Pivot with sizes as columns
df_pivot_sizes = (df_top
    .pivot(
        index='year',
        columns='size_emp',
        values=['country', 'sales']
    )
    .reset_index()
)

# Reorder columns to group country and sales by year
sizes = sorted(df_top['size_emp'].unique())
column_order = [('year', '')]
for size in sizes:
    column_order.extend([('country', size), ('sales', size)])

# Reorder and format
df_ranking_year = df_pivot_sizes[column_order].round(2).sort_values('year')

df_ranking_year