# Merge exploration between Porcentage of E-Commerce Sales and Purchases by Individuals

In [None]:
%run cleaning_ecommerce_sales.ipynb
%run cleaning_purchases_individuals.ipynb

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import functions
import dictionaries

df_cat, df_all, df_euro_all = import_df_ecommerce() # type: ignore
df_purchases_cleaned_v3, df_per_region = import_df() # type: ignore


## Online Purchases Processing

In [None]:
df_per_region["last_online_purchase"].unique()
df_sales_all = df_per_region[(df_per_region["grouped_individuals"] == "All individuals") & (df_per_region["last_online_purchase"] == "in the 12 months")]
df_sales_all.drop(columns=["last_online_purchase", "grouped_individuals"], inplace=True)

df_all_preliminary = df_sales_all.groupby(["region"]).mean(numeric_only=True).round(2)
display(df_all_preliminary)

df_sales_all

## E-Commerce Sales Processing

In [None]:
df_all_years = df_all.pivot(index=["region", "country"], columns="year", values="sales").reset_index()

df_all_preliminary = df_all_years.groupby(["region"]).mean(numeric_only=True).round(2)
display(df_all_preliminary)

df_all_years

## Merge of Sales and Purchases dataframes

In [None]:
df_merge = df_all_years.merge(df_sales_all, how="right", on=["region", "country"])
df_merge

# YYYY_x = E-Commerce Sales
# YYYY_y = Purchases

df_merge_regions_preliminary = df_merge.groupby(["region"]).mean(numeric_only=True).round(2)
display(df_merge_regions_preliminary)

In [None]:
sales_columns = [col for col in df_all_years.columns if col not in ["region", "country"]]
purchases_columns = [col for col in df_sales_all if col not in ["region", "country"]]

# print(sales_columns)
# print(purchases_columns)

temporary_columns = df_merge.columns
temporary_columns = temporary_columns.drop(["region", "country"])
temporary_columns = temporary_columns.to_list()
print(temporary_columns)

def ratio_func(x, y):
    if pd.isna(x) or pd.isna(y):
        return np.nan
    if y == 0:
        return np.nan
    return round(x / y, 2)

for col1, col2 in zip(sales_columns, purchases_columns):
    # print(col1, col2)
    new_col_name = col1
    df_merge[new_col_name] = df_merge.apply(lambda row: ratio_func(row[f"{col1}_x"], row[f"{col2}_y"]), axis=1)
    
df_merge.drop(columns=temporary_columns, inplace=True)

df_merge


In [None]:
# Display by regions
df_merge_regions = df_merge.groupby(["region"]).mean(numeric_only=True).round(2)
df_merge_regions

In [None]:
# Preprocess for plotting
df_transposed = df_merge_regions.T # transposing DF to have years as rows and country as columns
df_transposed.index = df_transposed.index.astype(int) # Convert the index (years) to integers for proper plotting /!\

# Plot the data
functions.plot_line_chart(
    df_transposed,
    'Year', 
    f'Ratio Sales / Purchases', 
    f'Ratio Sales / Purchases',
    'Region',
    )