# instructions
1. Go to the "View" tab at the top of the screen and enable "Enable Full Width Notebook"
2. Set country variable, my_country, to desired country, for which you want to see the RCAs in the graph below
3. Go to the "Run" tab at the top of the screen and click "Run All Cells"

Note that in this visualization, the range of RCA is limited to be from 0 to 1. A value of 0.5 and above means that the share of the country's exports in that product exceeds the world average. 

In [14]:
my_country = 'BOL'

In [15]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import matplotlib as mpl 
from plotly.express.colors import qualitative
import matplotlib.patheffects as path_effects

In [16]:
stack_umap = pd.read_csv('sitc_product_space_positions.csv')
stack_umap.head()

Unnamed: 0,sitc,x,y,cluster,sitc_name,n_prod,cluster_cl
0,0,15.890849,-4.213998,,,,
1,11,15.46609,-4.065872,0.0,Animals of the bovine species (including buffa...,2.0,0.0
2,12,15.602483,-4.309349,0.0,"Sheep and goats, live",2.0,0.0
3,13,16.243904,-3.147542,2.0,"Swine, live",6.0,0.0
4,14,17.00905,-3.723602,5.0,"Poultry, live",3.0,0.0


In [17]:
trade_5y = pd.read_csv('interactive_viz_trade_5y.csv')
trade_5y['sitc'] = trade_5y['sitc'].astype(str).str.zfill(4)
trade_5y.head()

Unnamed: 0,location_code,year,sitc,median_rca_norm
0,AGO,2018,11,0.000643
1,AGO,2018,12,0.013092
2,AGO,2018,13,0.0
3,AGO,2018,14,0.0
4,AGO,2018,15,0.000832


In [18]:
this_rca = trade_5y.loc[(trade_5y['location_code']==my_country) & (trade_5y['year']==trade_5y['year'].max())].copy()[['sitc', 'median_rca_norm']]
this_rca.head()

Unnamed: 0,sitc,median_rca_norm
11760,11,0.0
11761,12,0.0
11762,13,0.0
11763,14,0.0
11764,15,0.0


In [19]:
macro_arrows = pd.read_csv('sitc_product_space_macro_arrows.csv')
macro_arrows.head()

Unnamed: 0,sitc_in,sitc_out,dist,source,x_o,y_o,target,x_d,y_d,min_dist,u,v
0,482,2117,3.941229,0.0,15.48288,-2.847898,5.0,14.176732,0.870605,3.941229,-1.306148,3.718503
1,545,2929,7.423964,3.0,11.43904,2.4074,4.0,18.311752,5.214724,7.423964,6.872712,2.807324
2,582,6665,1.13595,2.0,9.756202,5.288166,17.0,9.956572,6.406305,1.13595,0.20037,1.118139
3,585,6746,4.060609,3.0,10.026635,0.637069,14.0,8.854506,-3.250689,4.060609,-1.172129,-3.887758
4,615,2876,1.060765,4.0,18.621372,6.372491,9.0,17.823038,7.07098,1.060765,-0.798334,0.698488


In [20]:
sitc_cluster_names = pd.read_csv('sitc_cluster_names.csv')
sitc_cluster_names['name'] = sitc_cluster_names['name'].str.replace('Food & Manufactures', 'Food & Material Manufactures')
sitc_cluster_names.head()

Unnamed: 0,cluster,name
0,0,Animal & Crop Products
1,1,Meat & Seafood
2,2,Grains & Oilseeds
3,3,Produce
4,4,Cash Crops


In [21]:
# --- Data prep ---
scatter_data = stack_umap.loc[stack_umap['sitc'].str.len() == 4].copy().merge(this_rca, on='sitc', how='left')
scatter_data['cluster'] = scatter_data['cluster'].astype(str)
scatter_data['median_rca_norm_adj'] = scatter_data['median_rca_norm'] + 0.05

temp = stack_umap.loc[stack_umap['sitc'].str.contains('macro')].copy()[['sitc', 'x', 'y']]
temp = temp.loc[~temp['sitc'].str.contains('_a')]
temp = temp.loc[~temp['sitc'].str.contains('_b')]
temp['sitc'] = temp['sitc'].str[6:]
temp = temp.rename(columns={'sitc': 'cluster'})
temp['cluster'] = temp['cluster'].astype(int)
temp = temp.merge(sitc_cluster_names, on='cluster')


tab20_like = qualitative.D3  # This gives 20 distinct colors
# Other options: qualitative.Dark24, qualitative.Light24

fig = px.scatter(
    scatter_data,
    x='x', 
    y='y',
    color='cluster',
    size='median_rca_norm_adj',  # <-- Added
    size_max=20,  # Adjust as needed
    color_discrete_sequence=px.colors.qualitative.D3,
    hover_data={'x': False, 'y': False, 'cluster': False, 'sitc_name': False, 'median_rca_norm': True}
)

# Assign customdata trace by trace
for i, cluster_val in enumerate(fig.data):
    cluster_name = cluster_val.name
    mask = scatter_data['cluster'] == cluster_name
    fig.data[i].customdata = scatter_data.loc[mask, ['sitc_name', 'median_rca_norm']].to_numpy()
    fig.data[i].hovertemplate = '%{customdata[0]}<br>RCA: %{customdata[1]:.2f}'


arrow_color = '#1f77b4'
for r in macro_arrows.itertuples(index=False):
    x0, y0, u, v = r.x_o, r.y_o, r.u, r.v
    x1, y1 = x0 + u, y0 + v
    fig.add_annotation(
        x=x1, y=y1, xref='x', yref='y',
        ax=x0, ay=y0, axref='x', ayref='y',
        showarrow=True,
        arrowhead=3, arrowsize=1, arrowwidth=1.5, arrowcolor=arrow_color,
        opacity=1
    )

# Macro labels
for _, row in temp.iterrows():
    fig.add_annotation(
        x=row['x'], y=row['y'],
        text=row['name'],
        showarrow=False,
        font=dict(size=12, color='black'),
        bgcolor='rgba(255,255,255,0.7)',
        borderpad=2
    )

# Layout tweaks
fig.update_xaxes(showgrid=False, zeroline=False, showticklabels=False)
fig.update_yaxes(showgrid=False, zeroline=False, showticklabels=False, scaleanchor="x", scaleratio=0.7)

fig.update_layout(
    xaxis_title='',
    yaxis_title='',
    showlegend=False,
    width=1700,
    height=850,
    margin=dict(l=0, r=0, t=0, b=0)
)

fig.show()