# Exploring E-Commerce Sales

In [None]:
%run cleaning_ecommerce_sales.ipynb
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import functions

df_cat, df_all, df_euro_all = import_df_ecommerce() # type: ignore


## Check overall Euro Area values

In [None]:
df_euro_years = df_euro_all.pivot(index=["country"], columns='year', values='sales').reset_index()
df_euro_years

In [81]:
# Preprocess for plotting

df_euro_years.set_index('country', inplace=True) # set country as index
df_transposed = df_euro_years.T # transposing DF to have years as rows and country as columns
df_transposed.index = df_transposed.index.astype(int) # Convert the index (years) to integers for proper plotting /!\

In [None]:
functions.plot_line_chart(
    df_transposed, 
    'Year', 
    '% of E-Commerce Sales by Size Group', 
    '% of Sales that are from E-Commerce by Size Group (2010-2024)',
    'Region',
    )

In [None]:
df_euro_years.columns

euro_2024 = df_euro_years[2024]
euro_2024.name

labels = 'Sales from E-Commerce', 'Others'
sizes = [euro_2024.values[0], 100 - euro_2024.values[0]]



fig, ax = plt.subplots()
ax.pie(sizes, labels=labels, autopct='%1.1f%%')

## Hypotesis
- Smaller firms grew in the latest years as technology became more affordable

### All time historical ranking category country

In [None]:
df_all.sort_values(by="sales", ascending=False)[["country", "year", "sales"]]

In [None]:
df_pivot_size = df_cat.pivot(index=['size_emp', 'region', "country"], columns='year', values='sales').reset_index()
df_pivot_size

In [None]:
df_region_group = df_pivot_size.groupby(["region", "size_emp"]).mean(numeric_only=True).round(2)
df_region_group

In [None]:
df_pivot_all = df_all.pivot(index=['country'], columns='year', values='sales').reset_index()
df_pivot_all

In [88]:
# Preprocess for plotting
df_pivot_all.set_index('country', inplace=True) # set country as index
df_transposed = df_pivot_all.T # transposing DF to have years as rows and country as columns
df_transposed.index = df_transposed.index.astype(int) # Convert the index (years) to integers for proper plotting /!\

In [None]:
# Plot the data
functions.plot_line_chart(
    df_transposed, 
    'Year', 
    '% of E-Commerce Sales', 
    '% of Sales that are from E-Commerce by by Country (2010-2024)',
    'Country',
    )

# This is too messy and we cannot have conclusons

### % of Sales that are from E-Commerce by European Region (2010-2024)

In [None]:
df_regions_years = df_all.pivot(index=["country", "region"], columns="year", values="sales").reset_index()

df_region_group = df_regions_years.groupby("region").mean(numeric_only=True).round(2)

df_region_group

# df_regions_years = df_all.pivot(index=["region"], columns="year", values="sales").reset_index()
# df_regions_years

In [91]:
# Preprocess for plotting

# df_region_group.set_index('region', inplace=True) # set country as index
df_transposed = df_region_group.T # transposing DF to have years as rows and country as columns
df_transposed.index = df_transposed.index.astype(int) # Convert the index (years) to integers for proper plotting /!\

In [None]:
# Plot the data
functions.plot_line_chart(
    df_transposed, 
    'Year', 
    '% of E-Commerce Sales', 
    '% of Sales that are from E-Commerce by European Region (2010-2024)',
    'Region',
    )

In [None]:
df_pivot_size = df_cat.pivot(index=['size_emp', 'country'], columns='year', values='sales').reset_index()
df_pivot_size


In [None]:
# Statistical values overall by enterprise size groups
df_cat.groupby("size_emp")[["size_emp", "country","sales"]].agg({"sales": {"mean", "min", "max", "std"}}).round(2)

In [None]:
# Mean values for the % of sales for each enterprise size group year over year
df_mean_by_year = (df_cat.
                   pivot_table(
                       index='size_emp', 
                       columns='year', 
                       values='sales',
                       aggfunc='mean'
                       )
                       .round(2)
                       .sort_index())

df_mean_by_year.reset_index()

In [96]:
# Preprocess for plotting

# df_region_group.set_index('region', inplace=True) # set country as index
df_transposed = df_mean_by_year.T # transposing DF to have years as rows and country as columns
df_transposed.index = df_transposed.index.astype(int) # Convert the index (years) to integers for proper plotting /!\

In [None]:
functions.plot_line_chart(
    df_transposed, 
    'Year', 
    '% of E-Commerce Sales by Size Group', 
    '% of Sales that are from E-Commerce by Size Group (2010-2024)',
    'Region',
    )

## Ranking of top digitalized countries per year.

In [None]:
# Top countries by sales for each combination
df_top = (df_cat
    .reset_index()
    .groupby(['size_emp', 'year'])
    .apply(lambda x: x.nlargest(1, 'sales'))
    .reset_index(drop=True)
)

# Pivot with sizes as columns
df_pivot_sizes = (df_top
    .pivot(
        index='year',
        columns='size_emp',
        values=['country', 'sales']
    )
    .reset_index()
)

# Reorder columns to group country and sales by year
sizes = sorted(df_top['size_emp'].unique())
column_order = [('year', '')]
for size in sizes:
    column_order.extend([('country', size), ('sales', size)])

# Reorder and format
df_ranking_year = df_pivot_sizes[column_order].round(2).sort_values('year')

df_ranking_year

In [None]:
# Create the DataFrame
data = {
    'year': range(2010, 2025),
    'B(10-49)_country': ['Norway', 'Norway', 'Norway', 'Denmark', 'Czechia', 'Ireland', 'Ireland', 
                        'Norway', 'Denmark', 'Ireland', 'Denmark', 'Denmark', 'Ireland', 'Denmark', 'Denmark'],
    'B(10-49)_sales': [32.41, 29.46, 32.58, 24.83, 25.13, 27.91, 26.31, 27.01, 28.87, 
                       32.38, 36.33, 35.64, 34.1, 33.88, 35.58],
    'C(50-249)_country': ['Norway', 'Norway', 'Iceland', 'Ireland', 'Ireland', 'Ireland', 'Ireland',
                         'Ireland', 'Ireland', 'Ireland', 'Ireland', 'Sweden', 'Sweden', 'Sweden', 'Sweden'],
    'C(50-249)_sales': [42.02, 36.29, 50.21, 42.08, 43.1, 50.63, 47.0, 44.75, 48.2, 
                        50.1, 50.22, 48.1, 49.37, 46.47, 48.48],
    'D(>=250)_country': ['Sweden', 'Sweden', 'Iceland', 'Denmark', 'Denmark', 'Denmark', 'Belgium',
                        'Belgium', 'Belgium', 'Sweden', 'Sweden', 'Sweden', 'Sweden', 'Sweden', 'Sweden'],
    'D(>=250)_sales': [52.18, 51.38, 55.18, 52.7, 50.97, 53.48, 54.86, 57.22, 58.51,
                       63.55, 63.21, 65.56, 64.12, 62.38, 63.76]
}

df = pd.DataFrame(data)

# Get unique countries and assign colors
all_countries = set()
for col in ['B(10-49)_country', 'C(50-249)_country', 'D(>=250)_country']:
    all_countries.update(df[col].unique())
all_countries = sorted(list(all_countries))

# Create a color map for countries
colors = {
    'Norway': '#1f77b4',    # blue
    'Sweden': '#2ca02c',    # green
    'Denmark': '#ff7f0e',   # orange
    'Ireland': '#d62728',   # red
    'Iceland': '#9467bd',   # purple
    'Belgium': '#8c564b',   # brown
    'Czechia': '#e377c2',   # pink
}

# Set up the plot
fig, ax = plt.subplots(figsize=(20, 10))

# Width of each bar and positions
bar_width = 0.2
years = df['year'].unique()
x = np.arange(len(years))

# Plot bars for each size category
for i, (pos, size, country_col, sales_col) in enumerate([
    (-bar_width, 'Small (10-49)', 'B(10-49)_country', 'B(10-49)_sales'),
    (0, 'Medium (50-249)', 'C(50-249)_country', 'C(50-249)_sales'),
    (bar_width, 'Large (>=250)', 'D(>=250)_country', 'D(>=250)_sales')
]):
    # Create bars
    bars = ax.bar(x + pos, df[sales_col], bar_width, 
                  label=size)
    
    # Color bars by country
    for j, bar in enumerate(bars):
        country = df[country_col].iloc[j]
        bar.set_color(colors[country])
        # Add country label on top of each bar
        ax.text(x[j] + pos, df[sales_col].iloc[j], country, 
                ha='center', va='bottom', rotation=90)

# Customize the plot
ax.set_xlabel('Year')
ax.set_ylabel('Sales')
ax.set_title('Sales by Country and Company Size Over Time')
ax.set_xticks(x)
ax.set_xticklabels(years, rotation=45)

# Add legend for sizes
ax.legend(title='Company Size', loc='upper left')

# Add legend for countries
country_patches = [plt.Rectangle((0,0),1,1, fc=color) for country, color in colors.items()]
ax.legend(country_patches, colors.keys(), title='Countries', 
         loc='upper right', bbox_to_anchor=(1.15, 1))

# Add grid for better readability
ax.grid(True, axis='y', linestyle='--', alpha=0.7)

# Adjust layout to prevent label cutoff
plt.tight_layout()

# Show plot
plt.show()

In [None]:
df_pivot_country = df_cat.pivot(index=['country','size_emp'], columns='year', values='sales').reset_index().head(50)
df_pivot_country

In [101]:
# # Map countries into regions

# df_cat_copy = df_cat.copy()
# df_regions = functions.region_mapping(df_cat_copy)
# df_regions

In [None]:
# 
df_regions_years = df_cat.pivot(index=["year", "size_emp", "country"], columns="region", values="sales").reset_index()

df_regions_years