# Sankey diagram of connections in trading between continents

This can be used in the future to show trading between continents and emissions consequences.

In [27]:
#Use python 3.11
import pandas as pd
import plotly.graph_objects as go
import seaborn as sns

In [2]:
# Load data
df = pd.read_csv(r'.\data\continent_trade_matrix.csv')

In [3]:
print(df.year.max(), df.year.min())

2022 1986


In [4]:
#Drop antarctica
df = df[df['target']!= 'antarctica']
df = df[df['source']!= 'antarctica']

In [24]:
unique_continents = sorted(set(df['source'].unique()) | set(df['target'].unique()))

In [54]:
# Generate color palette
palette = sns.color_palette("colorblind", len(unique_continents))
# Convert Seaborn colors to hex colors
hex_colors = palette.as_hex()

palette

In [62]:
# Assuming your DataFrame is named 'df'
df_2022 = df[df['year'] == 2022]

# Extract unique continents for labels
unique_continents = sorted(set(df_2022['source'].unique()) | set(df_2022['target'].unique()))

# Create mappings to node indices (0, 1, 2, ...)
continent_to_index = {continent: i for i, continent in enumerate(unique_continents)}

# Prepare data for Sankey diagram
nodes = dict(label=unique_continents, color= hex_colors)  # Customize colors
links = dict(
    source=df_2022['source'].map(continent_to_index),
    target=df_2022['target'].map(continent_to_index),
    value=df_2022['value'],
)

# Adjust link colors based on source node color
opacity = 0.4
link_colors = [nodes['color'][src].replace(")", f", {opacity})") for src in links['source']]

# Create the Sankey diagram
fig = go.Figure(data=[go.Sankey(
    node= nodes,  # Customize colors,
    link=dict(
        source=links['source'],
        target=links['target'],
        value=links['value'],
        color=link_colors  # Use adjusted colors
    )
)])

fig.update_layout(
    title_text="2022 Trade Flow by Continent",
    font_size=16,
    )

fig.show()


In [63]:
def plot_sankey_for_years(df, years):
    for year in years:
        df_year = df[df['year'] == year]

        # Extract unique continents for labels
        unique_continents = sorted(set(df_year['source'].unique()) | set(df_year['target'].unique()))

        # Create mappings to node indices
        continent_to_index = {continent: i for i, continent in enumerate(unique_continents)}

        # Prepare data for Sankey diagram
        nodes = dict(label=unique_continents, color= hex_colors)  # Customize colors
        links = dict(
            source=df_2022['source'].map(continent_to_index),
            target=df_2022['target'].map(continent_to_index),
            value=df_2022['value'],
        )

        # Adjust link colors based on source node color
        opacity = 0.4
        link_colors = []
        for src in links['source']:
            if src < len(nodes['color']):
                link_colors.append(nodes['color'][src].replace(")", f", {opacity})"))
            else:
                link_colors.append('gray')  # Use gray color for missing continents

        # Create the Sankey diagram
        fig = go.Figure(data=[go.Sankey(
            node=nodes,  # Customize colors,
            link=dict(
                source=links['source'],
                target=links['target'],
                value=links['value'],
                color=link_colors  # Use adjusted colors
            )
        )])

        fig.update_layout(
            title_text=f"{year} Trade Flow by Continent",
            font_size=12
        )

        # Save as HTML
        fig.write_html(f"sankey_diagram_{year}.html")

# Example usage
years_to_plot = [2022]  # Specify the years you want to plot
plot_sankey_for_years(df, years_to_plot)


In [64]:
years_to_plot = [year for year in range(2022, 1986, -5)]
years_to_plot

[2022, 2017, 2012, 2007, 2002, 1997, 1992, 1987]

In [65]:
for year in years_to_plot:
    print((df.year == year).sum())

36
36
36
36
36
36
36
36


In [66]:
# Plot every 5 years
plot_sankey_for_years(df, years_to_plot)