In [103]:
# %load_ext pretty_jupyter

In [104]:
import numpy as np
import pandas as pd
import plotly
import plotly.offline as pyo
import plotly.express as px
pyo.init_notebook_mode(connected=True)

# Datasets

Firstly we read in and try to understand the data

In [105]:
services = pd.read_csv("services_annual_dataset.csv", encoding="1250", low_memory=False)
merchandise = pd.read_csv("merchandise_values_annual_dataset.csv", encoding="1250", low_memory=False)

## Overview

Datasets are lists of transactions between parties of given services / merchandise

## Preprocess

First of all we've dropped most of the collumns instead of a couple that we though would be the most interesting for the analysis.

Second thing we did was simplify the indicator collumn so it would contain only the import or export word.

And lastly from both data frames we deleted the "Total" and "0" rows as well as rows which had "World" as a Reporter

In [106]:
merchandise.drop(columns = ["IndicatorCategory", "IndicatorCode", "ReporterCode", "PartnerCode", "ProductClassificationCode", "ProductCode", "PeriodCode", "Period", "FrequencyCode", "Frequency", "UnitCode", "ValueFlagCode", "ValueFlag", "Unit", "ProductClassification"], inplace=True)

services.drop(columns = [ "ProductClassification", "IndicatorCategory", "IndicatorCode", "ReporterCode", "PartnerCode", "ProductClassificationCode", "ProductCode", "PeriodCode", "Period", "FrequencyCode", "Frequency", "UnitCode", "ValueFlagCode", "ValueFlag", "Unit"], inplace=True)

In [107]:
services.loc[services["Indicator"] == 'Commercial services imports by sector - annual (1980-2013)', "Indicator"] = "Import"
services.loc[services["Indicator"] == 'Commercial services imports by sector and partner - annual', "Indicator"] = "Import"
services.loc[services["Indicator"] == 'Commercial services exports by sector - annual  (1980-2013)', "Indicator"] = "Export"
services.loc[services["Indicator"] == 'Commercial services exports by sector and partner - annual', "Indicator"] = "Export"

merchandise.loc[merchandise["Indicator"] == 'Merchandise exports by product group - annual', "Indicator"] = "Export"
merchandise.loc[merchandise["Indicator"] == 'Merchandise imports by product group - annual', "Indicator"] = "Import"

In [108]:
services = services[services["Product"] != "Memo item: Total services"]
services = services[services["Reporter"] != "World"]

merchandise = merchandise[merchandise["Product"] != "Total merchandise"]
merchandise = merchandise[merchandise["Reporter"] != "World"]

In [109]:
services = services[services["Value"] != 0]

merchandise = merchandise[merchandise["Value"] != 0]

## Final data frame
[//]: # (-.- .tabset .tabset-pills)


Here are the first 5 rows of the data frames we will be working on

### Services

In [110]:
services.sample(n=5).reset_index(drop=True)

Unnamed: 0,Indicator,ReporterISO3A,Reporter,PartnerISO3A,Partner,Product,Year,Value
0,Import,DEU,Germany,,World,Royalties and license fees,1989,3027
1,Export,,Non-EU western Europe,,World,Sea transport,2020,24412
2,Export,,"African, Caribbean and Pacific States (ACP)",,World,Commercial services (Services excl. government...,1982,9968
3,Import,SWE,Sweden,AUT,Austria,Freight (Air),2021,2
4,Import,FRA,France,,Euro Area (20),Legal services,2021,477


### Merchandise

In [111]:
merchandise.sample(n=5).reset_index(drop=True)

Unnamed: 0,Indicator,ReporterISO3A,Reporter,PartnerISO3A,Partner,Product,Year,Value
0,Export,EST,Estonia,,World,Agricultural products,2000,514
1,Export,TTO,Trinidad and Tobago,,World,Agricultural products,2023,503
2,Import,BGR,Bulgaria,,World,Automotive products,2002,612
3,Import,MAC,"Macao, China",,World,Pharmaceuticals,2003,28
4,Export,GEO,Georgia,,World,Machinery and transport equipment,1996,11


# Visualizations
[//]: # (-.- .tabset .tabset-pills)

## Total trade value

In [112]:
px.line(merchandise.groupby('Year')['Value'].sum().reset_index(), x='Year', y='Value', title='Total Trade Value Over Time (Merchandise)', template="plotly_white")

In [113]:
px.line(services.groupby('Year')['Value'].sum().reset_index(), x='Year', y='Value', title='Total Trade Value Over Time (Services)', template="plotly_white")

## Import vs Export vs Global Average

In [114]:
poland_services = services[services["Reporter"] == "Poland"]

mean_poland_services = (
    poland_services
    .groupby(["Year", "Indicator"], as_index=False)["Value"]
    .mean()
)
mean_poland_services["Reporter"] = "Poland"

mean_global_services = (
    services
    .groupby(["Year", "Indicator"], as_index=False)["Value"]
    .mean()
)
mean_global_services["Reporter"] = "Global Average"


df = pd.concat([mean_poland_services, mean_global_services], ignore_index=True)

fig = px.line(
    df,
    x="Year",
    y="Value",
    color="Reporter",
    line_dash="Indicator",
    markers=True,
    title=f"Poland: Mean Services Imports vs Exports vs Global Average",
    template='simple_white'
)
fig.show()


In [115]:
poland_merchandise = merchandise[merchandise["Reporter"] == "Poland"]

mean_poland_merchandise = (
    poland_merchandise
    .groupby(["Year", "Indicator"], as_index=False)["Value"]
    .mean()
)
mean_poland_merchandise["Reporter"] = "Poland"

mean_global_merchandise = (
    merchandise
    .groupby(["Year", "Indicator"], as_index=False)["Value"]
    .mean()
)
mean_global_merchandise["Reporter"] = "Global Average"


df = pd.concat([mean_poland_merchandise, mean_global_merchandise], ignore_index=True)

fig = px.line(
    df,
    x="Year",
    y="Value",
    color="Reporter",
    line_dash="Indicator",
    markers=True,
    title=f"Poland: Mean Merchandise Imports vs Exports vs Global Average",
    template='simple_white'
)
fig.show()


## Treemap

In [None]:
exports = merchandise[(merchandise['Indicator'] == "Import") & (merchandise['Year'] == 2023)]

exports_grouped = exports.groupby(['Reporter', 'Product'], as_index=False)['Value'].sum()

fig = px.treemap(exports_grouped,
                    path=['Reporter', 'Product'],
                    values='Value',
                    title='Export Value by Country and Product (2023)',
                    template='simple_white')
fig.update_traces(textinfo='label+value')
fig.show()



## Leaderboard

In [117]:
exports = services[services["Indicator"] == "Export"]

grouped = exports.groupby(["Year", "Reporter"])["Value"].sum().reset_index()

grouped["Rank"] = grouped.groupby("Year")["Value"].rank(ascending=False, method="min")

top_exporters = grouped.groupby("Reporter")["Value"].sum().nlargest(10).index

top_data = grouped[grouped["Reporter"].isin(top_exporters)]

fig = px.line(
    top_data,
    x="Year",
    y="Rank",
    color="Reporter",
    markers=True,
    title="Leaderboard of Top 10 Exporters Over Time (Services)"
)

fig.update_layout(
    xaxis_title="Year",
    legend_title="Country",
    template="plotly_white",
    legend=dict(
        orientation="h",
        yanchor="top",
        y=-0.2,
        xanchor="center",
        x=0.5
    )
)

fig.update_yaxes(
    autorange="reversed",      
    title="Export Rank",
    tickmode="linear",         
    dtick=1                   
)

fig.show()


In [118]:
exports = merchandise[merchandise["Indicator"] == "Export"]

grouped = exports.groupby(["Year", "Reporter"])["Value"].sum().reset_index()

grouped["Rank"] = grouped.groupby("Year")["Value"].rank(ascending=False, method="min")

top_exporters = grouped.groupby("Reporter")["Value"].sum().nlargest(10).index

top_data = grouped[grouped["Reporter"].isin(top_exporters)]

fig = px.line(
    top_data,
    x="Year",
    y="Rank",
    color="Reporter",
    markers=True,
    title="Leaderboard of Top 10 Exporters Over Time (Merchandise)"
)

fig.update_layout(
    xaxis_title="Year",
    legend_title="Country",
    template="plotly_white",
    legend=dict(
        orientation="h",
        yanchor="top",
        y=-0.2,
        xanchor="center",
        x=0.5
    )
)

fig.update_yaxes(
    autorange="reversed",      
    title="Export Rank",
    tickmode="linear",         
    dtick=1                   
)

fig.show()


## Bubble Map

In [119]:
df_bubble = merchandise[merchandise["Indicator"] == "Export"].groupby(['Reporter', 'ReporterISO3A'], as_index=False)['Value'].sum()

min_bubble_size = 1
max_bubble_size = 50

size_scaled = df_bubble['Value'] / df_bubble['Value'].max() * (max_bubble_size - min_bubble_size) + min_bubble_size

fig_bubble = px.scatter_geo(
    df_bubble,
    locations="ReporterISO3A",
    color="Value",
    size=size_scaled,
    hover_name="Reporter",
    projection="natural earth",
    title="Sum of exports over the whole timeframe",
    template="plotly_white"
)

fig_bubble.show()


## Sunburst

In [120]:
es = px.sunburst(
  merchandise, path=["Indicator", "Product"], values="Value",
  color="Indicator",
  title="The amount of Productes Imported and Exported"
)
es.show()


In [121]:
# command we used for generating html:
# jupyter nbconvert --to html --template jp-extended main.ipynb