In [332]:
import data_loader as dl
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np

geo_edf = dl.get_raw_data()

In [333]:
df_for_sankey = geo_edf[
    ['veh_type_1',  'sp1', 'next_veh_type_1']
]
df_for_sankey

sankey_conn_curr_next = df_for_sankey.groupby('veh_type_1').agg({'next_veh_type_1': 'value_counts'}).rename(columns={'next_veh_type_1': 'delta_curr_to_next'}).reset_index()
sankey_conn_curr_next['curr_veh_type_source'] = sankey_conn_curr_next['veh_type_1'].copy().apply(lambda x: float(x)-1)
sankey_conn_curr_next['next_veh_type_source'] = sankey_conn_curr_next['next_veh_type_1'].copy().apply(lambda x: 3+float(x)-1)



fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=['curr_veh_count_1', 'curr_veh_count_2', 'curr_veh_count_3', 'next_veh_count1', 'next_veh_count2', 'next_veh_count3', 'next_veh_count4'],
        # color="blue",
        x=[0, 0, 0, 1, 1, 1, 1]  # Adjust x-axis position for each node
    ),
    link=dict(
        source=sankey_conn_curr_next['curr_veh_type_source'].to_list(),
        target=sankey_conn_curr_next['next_veh_type_source'].to_list(),
        value=sankey_conn_curr_next['delta_curr_to_next'].to_list()
    )
)])

fig.update_layout(title_text="Basic Sankey Diagram", font_size=10)
fig.show()

In [334]:
# newer

In [335]:
df_for_sankey = geo_edf[['veh_pt_1', 'veh_type_1', 'sp1', 'comb_weight']].copy()
df_for_sankey['sp1'] = df_for_sankey['sp1'].map({1: "Small ICE", 2: "Small BEV", 3: "Large ICE", 4: "Large BEV", 5:"Pickup truck ICE", 6: "Pickup truck BEV"})
df_for_sankey['veh_pt_1'] = df_for_sankey['veh_pt_1'].map({1: "Small Car", 2: "Large Car ", 3: "Pickup Truck"})
df_for_sankey['veh_type_1'] = df_for_sankey['veh_type_1'].map({1: "ICE", 2: "HEV", 3: "PHEV", 4: "BEV"})
df_for_sankey

Unnamed: 0,veh_pt_1,veh_type_1,sp1,comb_weight
0,Small Car,ICE,Large ICE,0.357282
1,Small Car,HEV,Large BEV,5.000008
2,Small Car,HEV,Small ICE,1.896234
3,Small Car,HEV,Small ICE,0.173927
4,Small Car,HEV,Large ICE,0.038582
...,...,...,...,...
1949,Small Car,HEV,Large ICE,4.749426
1950,Small Car,PHEV,Pickup truck BEV,0.713148
1951,Small Car,HEV,Small ICE,0.137644
1952,Small Car,HEV,Small ICE,0.155176


In [336]:

# filter anything that is not nan
df_for_sankey.fillna(-1, inplace=True)
df_for_sankey = df_for_sankey[(df_for_sankey['sp1'] != -1) & (df_for_sankey['veh_type_1'] != -1) & (df_for_sankey['veh_pt_1'] != -1)].copy()


#convert them to int
# df_for_sankey[['veh_type_1', 'veh_pt_1', 'sp1']] = df_for_sankey[['veh_type_1', 'veh_pt_1', 'sp1']].astype(int)

# create a new column that is a combination of veh_pt_1 and veh_type_1
df_for_sankey['curr'] = df_for_sankey['veh_pt_1'].astype(str) + '_' + df_for_sankey['veh_type_1'].astype(str)
df_for_sankey.sort_values(by=['curr', 'sp1'], inplace=True)
df_for_sankey

Unnamed: 0,veh_pt_1,veh_type_1,sp1,comb_weight,curr
19,Large Car,HEV,Large BEV,0.201524,Large Car _HEV
43,Large Car,HEV,Large BEV,0.184200,Large Car _HEV
208,Large Car,HEV,Large BEV,0.144763,Large Car _HEV
233,Large Car,HEV,Large BEV,0.408959,Large Car _HEV
540,Large Car,HEV,Large BEV,0.642585,Large Car _HEV
...,...,...,...,...,...
1745,Small Car,PHEV,Small ICE,4.344947,Small Car_PHEV
1806,Small Car,PHEV,Small ICE,0.171731,Small Car_PHEV
1817,Small Car,PHEV,Small ICE,0.353291,Small Car_PHEV
1855,Small Car,PHEV,Small ICE,4.720936,Small Car_PHEV


In [337]:
df_for_sankey = df_for_sankey.groupby(['curr', 'sp1']).sum('comb_weight').reset_index()
# df_for_sankey

In [338]:
df_for_sankey = df_for_sankey.groupby(['curr', 'sp1']).sum('comb_weight').reset_index()
df_for_sankey.sort_values(['curr', 'sp1'], inplace=True)
# df_for_sankey


In [339]:
# Get unique values from 'veh_pt_1' and 'veh_type_1' columns
veh_pt_values = np.sort(np.array(geo_edf['veh_pt_1'].unique()))
veh_type_values = np.sort(np.array(geo_edf['veh_type_1'].unique()))

# Create the mapping dictionary using dictionary comprehension
mapping_dict = {f'{i+1}_{j+1}': num for num, (i, j) in enumerate(np.ndindex(len(veh_pt_values), len(veh_type_values)), start=1)}

df_for_sankey['curr_mapping'] = df_for_sankey['curr'].map(mapping_dict)

df_for_sankey


Unnamed: 0,curr,sp1,comb_weight,curr_mapping
0,Large Car _HEV,Large BEV,4.562911,
1,Large Car _HEV,Large ICE,1.887041,
2,Large Car _HEV,Small BEV,6.964996,
3,Large Car _HEV,Small ICE,2.390057,
4,Large Car _ICE,Large BEV,14.68757,
5,Large Car _ICE,Large ICE,1.899622,
6,Large Car _ICE,Pickup truck BEV,8.919592,
7,Large Car _ICE,Pickup truck ICE,8.769296,
8,Large Car _ICE,Small BEV,10.51695,
9,Large Car _ICE,Small ICE,8.241609,


In [340]:
def prepare_df_sankey(df, source, target):
    '''
        be sure to `sort` the df by source and then by target
    '''
    label_list = df[source].unique().tolist() + df[target].unique().tolist()

    df['source_index'] = df[source].copy().apply(lambda x: label_list.index(x))
    df['target_index'] = df[target].copy().apply(lambda x: label_list.index(x))

    return df, label_list

df, label_list = prepare_df_sankey(df_for_sankey, 'curr', 'sp1')


In [341]:
source = df_for_sankey['curr'].to_list()
destinations = df_for_sankey['sp1'].to_list()
values = df_for_sankey['comb_weight'].to_list()

label_list = df_for_sankey['curr'].unique().tolist() + df_for_sankey['sp1'].unique().tolist()


fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=label_list,
        # color="blue",
        x=[0, 0, 0, 1, 1, 1, 1]  # Adjust x-axis position for each node
    ),
    link=dict(
        source=df['source_index'].to_list(),
        target=df['target_index'].to_list(),
        value=df['comb_weight'].to_list()
    )
)])

fig.update_layout(title_text="Basic Sankey Diagram", font_size=10)
fig.show()
