In [1]:
# Config
# Tables
import pandas as pd
import pyreadstat
# Charting
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# Math
import numpy as np
# Tools
import os

import plotly.io as pio
pio.renderers.default = "notebook_connected"

In [2]:
df_original = pd.read_excel('./data/trademap/Trademap data.xlsx')
df_original

Unnamed: 0,Pais,Region,Fecha,FOB,Kg,Precio
0,Philippines,Asia oriental y el Pacifico,2021-05-01,3032000,561236,5.402362
1,Viet Nam,Asia oriental y el Pacifico,2021-11-01,2807000,507178,5.534546
2,Viet Nam,Asia oriental y el Pacifico,2021-09-01,2716000,494250,5.495195
3,Philippines,Asia oriental y el Pacifico,2021-08-01,3506000,491297,7.136213
4,Philippines,Asia oriental y el Pacifico,2021-09-01,3133000,446619,7.014928
...,...,...,...,...,...,...
1051,Algeria,Oriente Medio y Norte de Africa,2021-06-01,0,0,
1052,Algeria,Oriente Medio y Norte de Africa,2021-07-01,0,0,
1053,Algeria,Oriente Medio y Norte de Africa,2021-08-01,0,0,
1054,Algeria,Oriente Medio y Norte de Africa,2021-10-01,0,0,


In [3]:
def clean(df_original):
    df = df_original.copy()

    # Fecha -> Datetime
    df['Fecha'] = pd.to_datetime(df['Fecha'])

    # Drop NA and Sort
    df = df.dropna().sort_values(by=['Kg', 'Pais'], ascending=False)

    return df

df = clean(df_original)
df

Unnamed: 0,Pais,Region,Fecha,FOB,Kg,Precio
0,Philippines,Asia oriental y el Pacifico,2021-05-01,3032000,561236,5.402362
1,Viet Nam,Asia oriental y el Pacifico,2021-11-01,2807000,507178,5.534546
2,Viet Nam,Asia oriental y el Pacifico,2021-09-01,2716000,494250,5.495195
3,Philippines,Asia oriental y el Pacifico,2021-08-01,3506000,491297,7.136213
4,Philippines,Asia oriental y el Pacifico,2021-09-01,3133000,446619,7.014928
...,...,...,...,...,...,...
356,Belgium,Europa y Asia central,2020-12-01,1000,13,76.923077
357,Belgium,Europa y Asia central,2021-02-01,0,7,0.000000
358,United Kingdom,Europa y Asia central,2021-01-01,0,2,0.000000
359,Belgium,Europa y Asia central,2021-09-01,0,2,0.000000


In [4]:
def average(df):
    avg_df = pd.DataFrame(columns=['Precio'])

    for month in df.Fecha.unique():
        temp_df = df.copy()[df['Fecha'] == month]
        avg = np.average(temp_df.Precio, weights=temp_df.Kg)
        # print(avg)
        avg_df.loc[month] = avg
        avg_df = avg_df.sort_index()

    return avg_df

avg_df = average(df)
avg_df

Unnamed: 0,Precio
2020-12-01,2.485659
2021-01-01,1.121749
2021-02-01,1.993563
2021-03-01,2.121645
2021-04-01,1.971259
2021-05-01,3.143178
2021-06-01,2.981528
2021-07-01,2.72531
2021-08-01,3.556559
2021-09-01,3.763834


In [6]:
def sum_Kg_df(df, n_countries=10):
    df = df.copy()

    df = df.groupby(by=['Pais']).sum().sort_values(by=['Kg'], ascending=False)

    top = df[0:n_countries]
    bottom = df[n_countries:].sum()
    bottom.name = 'OTHER'

    top.loc['other'] = bottom

    normalized_sum = df['Kg']
    normalized_sum = (normalized_sum-normalized_sum.min())/(normalized_sum.max()-normalized_sum.min())
    
    return top['Kg'], df, normalized_sum

top_countries, full_sum, normalized_sum = sum_Kg_df(df, n_countries=10)
normalized_sum



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Pais
Viet Nam                            1.000000
Mexico                              0.622932
Philippines                         0.603283
Chile                               0.373700
Saudi Arabia                        0.311300
                                      ...   
Madagascar                          0.000429
Lao People's Democratic Republic    0.000403
Bulgaria                            0.000246
Vanuatu                             0.000152
Dominican Republic                  0.000000
Name: Kg, Length: 88, dtype: float64

In [7]:
def get_df_for_top_countries(df, top_countries):
    df = df.copy()
    countries = top_countries.index.values

    df = df[df['Pais'].isin(countries)]
    df = df.dropna()

    return df

df_top = get_df_for_top_countries(df, top_countries)
df_top

Unnamed: 0,Pais,Region,Fecha,FOB,Kg,Precio
0,Philippines,Asia oriental y el Pacifico,2021-05-01,3032000,561236,5.402362
1,Viet Nam,Asia oriental y el Pacifico,2021-11-01,2807000,507178,5.534546
2,Viet Nam,Asia oriental y el Pacifico,2021-09-01,2716000,494250,5.495195
3,Philippines,Asia oriental y el Pacifico,2021-08-01,3506000,491297,7.136213
4,Philippines,Asia oriental y el Pacifico,2021-09-01,3133000,446619,7.014928
...,...,...,...,...,...,...
279,Philippines,Asia oriental y el Pacifico,2021-02-01,64000,4698,13.622818
280,Bangladesh,Asia meridional,2020-12-01,10000,4694,2.130379
296,Thailand,Asia oriental y el Pacifico,2021-04-01,7000,2981,2.348205
307,Thailand,Asia oriental y el Pacifico,2021-02-01,5000,2006,2.492522


In [8]:
fig = px.treemap(
    df,
    path=['Region','Pais'],
    values='Kg',
    color='Precio',
    # hover_data=['Precio'],
    names = 'Pais',
    # parents = 'Region',
    color_continuous_scale=['green','yellow','red'],
    # color_continuous_midpoint=np.average(df['Precio'], weights=df['Kg']),
    range_color=[0,5],
    # col
    title='Treemap Region-Pais de los Kilogramos exportados por China y su Precio'
    )
fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
fig.show()

In [9]:
fig = px.bar(
    df[df['Region'].isin(['Asia oriental y el Pacifico', 'America Latina'])],
    # df,
    x='Fecha',
    y='Kg',
    color='Precio',
    facet_col = 'Region',
    range_color = [0,5],
    color_continuous_scale=['green','grey','red'],
    hover_data=['Pais'],
    # name='Pais',
    title='Volumen Exportado de China por Region y precio (color)',
)
fig.update_layout(
    height = 800
)
fig.show()

In [112]:
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
for pais in df['Pais'].unique():
    pais_df = df[df.Pais == pais].sort_values(by=['Fecha'])
    fig.add_trace(
        go.Bar(
            x=pais_df.Fecha,
            y=pais_df.Kg,
            name=pais,
            # mode='lines',
            ),
        secondary_y=False,
    )

fig.add_trace(
    go.Scatter(x=avg_df.index, y=avg_df['Precio'], name="Precio Promedio Global"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Double Y Axis Example"
)

# Set x-axis title
fig.update_xaxes(title_text="xaxis title")

# Set y-axes titles
fig.update_yaxes(title_text="<b>primary</b> yaxis title", secondary_y=False)
fig.update_yaxes(title_text="<b>secondary</b> yaxis title", secondary_y=True)

fig.show()

In [61]:
normalized_full = full['Kg']
# normalized_full=(normalized_full-normalized_full.mean())/normalized_full.std()
normalized_full = (normalized_full-normalized_full.min())/(normalized_full.max()-normalized_full.min())
normalized_full

Pais
Viet Nam                            1.000000
Mexico                              0.622932
Philippines                         0.603283
Chile                               0.373700
Saudi Arabia                        0.311300
                                      ...   
Madagascar                          0.000429
Lao People's Democratic Republic    0.000403
Bulgaria                            0.000246
Vanuatu                             0.000152
Dominican Republic                  0.000000
Name: Kg, Length: 88, dtype: float64

In [62]:
fig = px.line(
    df,
    x = df.Fecha,
    y = 'Precio',
    color = 'Pais',
    # opacity = normalized_full
)

fig.show()

In [76]:
df.Fecha.unique()

array(['2021-05-01T00:00:00.000000000', '2021-11-01T00:00:00.000000000',
       '2021-09-01T00:00:00.000000000', '2021-08-01T00:00:00.000000000',
       '2021-07-01T00:00:00.000000000', '2021-03-01T00:00:00.000000000',
       '2021-10-01T00:00:00.000000000', '2021-01-01T00:00:00.000000000',
       '2021-06-01T00:00:00.000000000', '2021-04-01T00:00:00.000000000',
       '2021-02-01T00:00:00.000000000', '2020-12-01T00:00:00.000000000'],
      dtype='datetime64[ns]')

In [85]:
avg_df = pd.DataFrame(columns=['Precio'])

for month in df.Fecha.unique():
    temp_df = df.copy()[df['Fecha'] == month]
    avg = np.average(temp_df.Precio, weights=temp_df.Kg)
    # print(avg)
    avg_df.loc[month] = avg
    avg_df = avg_df.sort_index()

avg_df

Unnamed: 0,Precio
2020-12-01,2.485628
2021-01-01,1.121008
2021-02-01,1.992638
2021-03-01,2.121146
2021-04-01,1.972524
2021-05-01,3.142066
2021-06-01,2.982081
2021-07-01,2.727451
2021-08-01,3.556833
2021-09-01,3.764178


In [None]:
fig = go.Figure()

# Add scatter trace with medium sized markers
fig.add_trace(
    go.Scatter(
        mode='markers',
        x=x,
        y=y,
        marker=dict(
            color='LightSkyBlue',
            size=20,
            line=dict(
                color='MediumPurple',
                width=2
            )
        ),
        showlegend=False
    )
)

# Add trace with large marker
fig.add_trace(
    go.Scatter(
        mode='markers',
        x=[2],
        y=[4.5],
        marker=dict(
            color='LightSkyBlue',
            size=120,
            line=dict(
                color='MediumPurple',
                width=12
            )
        ),
        showlegend=False
    )
)

fig.show()