In [3]:
import requests as rq
import bs4
import pandas as pd

In [4]:
url = 'https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(nominal)'
page = rq.get(url)

In [5]:
bs4page = bs4.BeautifulSoup(page.text, 'html.parser')
tables = bs4page.find_all('table',{'class':"wikitable sortable sticky-header static-row-numbers"})

from io import StringIO
chart = pd.read_html(StringIO(str(tables[0])))[0]#interested in first table
chart = chart.dropna()
chart.head()

Unnamed: 0_level_0,Country/Territory,UN region,IMF[1][13],IMF[1][13],World Bank[14],World Bank[14],United Nations[15],United Nations[15]
Unnamed: 0_level_1,Country/Territory,UN region,Forecast,Year,Estimate,Year,Estimate,Year
0,World,—,104476432,2023,100562011,2022,96698005,2021
1,United States,Americas,26949643,2023,25462700,2022,23315081,2021
2,China,Asia,17700899,[n 1]2023,17963171,[n 3]2022,17734131,[n 1]2021
3,Germany,Europe,4429838,2023,4072192,2022,4259935,2021
4,Japan,Asia,4230862,2023,4231141,2022,4940878,2021


In [6]:
chart.columns = ['Country/Territory', 'UN Region', 'IMF Forecast', 'Year', 'World Bank Estimate', 'WB Year', 'UN Estimate', 'UN Year']
chart

Unnamed: 0,Country/Territory,UN Region,IMF Forecast,Year,World Bank Estimate,WB Year,UN Estimate,UN Year
0,World,—,104476432,2023,100562011,2022,96698005,2021
1,United States,Americas,26949643,2023,25462700,2022,23315081,2021
2,China,Asia,17700899,[n 1]2023,17963171,[n 3]2022,17734131,[n 1]2021
3,Germany,Europe,4429838,2023,4072192,2022,4259935,2021
4,Japan,Asia,4230862,2023,4231141,2022,4940878,2021
...,...,...,...,...,...,...,...,...
209,Palau,Oceania,267,2023,—,—,218,2021
210,Kiribati,Oceania,246,2023,223,2022,227,2021
211,Nauru,Oceania,150,2023,151,2022,155,2021
212,Montserrat,Americas,—,—,—,—,72,2021


In [9]:
import plotly.express as px
import pandas as pd
chart['IMF Forecast'] = pd.to_numeric(chart['IMF Forecast'], errors='coerce')

chart_filtered = chart[chart['Country/Territory'] != 'World']

fig = px.bar(chart_filtered,
             x='UN Region',
             y='IMF Forecast',
             color='Country/Territory', 
             title='IMF GDP Forecast by Country within UN Regions',
             labels={'IMF Forecast': 'IMF GDP Forecast ($ Million)'},
             height=600,
             barmode='stack')

fig.update_traces(hovertemplate='Country/Territory: %{label}<br>IMF Forecast: %{value}')
fig.show()



In [42]:
fig.write_html("stacked_bar.html")



In [16]:
## load in the hierarchy information
import numpy as np
url = "https://raw.githubusercontent.com/bcaffo/MRIcloudT1volumetrics/master/inst/extdata/multilevel_lookup_table.txt"
multilevel_lookup = pd.read_csv(url, sep = "\t").drop(['Level5'], axis = 1)
multilevel_lookup = multilevel_lookup.rename(columns = {
    "modify"   : "roi", 
    "modify.1" : "level4",
    "modify.2" : "level3", 
    "modify.3" : "level2",
    "modify.4" : "level1"})
multilevel_lookup = multilevel_lookup[['roi', 'level4', 'level3', 'level2', 'level1']]
multilevel_lookup.head()


Unnamed: 0,roi,level4,level3,level2,level1
0,SFG_L,SFG_L,Frontal_L,CerebralCortex_L,Telencephalon_L
1,SFG_R,SFG_R,Frontal_R,CerebralCortex_R,Telencephalon_R
2,SFG_PFC_L,SFG_L,Frontal_L,CerebralCortex_L,Telencephalon_L
3,SFG_PFC_R,SFG_R,Frontal_R,CerebralCortex_R,Telencephalon_R
4,SFG_pole_L,SFG_L,Frontal_L,CerebralCortex_L,Telencephalon_L


In [21]:
id = 127
subjectData = pd.read_csv("https://raw.githubusercontent.com/smart-stats/ds4bio_book/main/book/assetts/kirby21AllLevels.csv")
subjectData = subjectData.loc[(subjectData.type == 1) & (subjectData.level == 5) & (subjectData.id == id)]
subjectData = subjectData[['roi', 'volume']]
## Merge the subject data with the multilevel data
subjectData = pd.merge(subjectData, multilevel_lookup, on = "roi")
subjectData = subjectData.assign(icv = "ICV")
subjectData = subjectData.assign(comp = subjectData.volume / np.sum(subjectData.volume))
subjectData.head()

Unnamed: 0,roi,volume,level4,level3,level2,level1,icv,comp
0,SFG_L,12926,SFG_L,Frontal_L,CerebralCortex_L,Telencephalon_L,ICV,0.00935
1,SFG_R,10050,SFG_R,Frontal_R,CerebralCortex_R,Telencephalon_R,ICV,0.00727
2,SFG_PFC_L,12783,SFG_L,Frontal_L,CerebralCortex_L,Telencephalon_L,ICV,0.009247
3,SFG_PFC_R,11507,SFG_R,Frontal_R,CerebralCortex_R,Telencephalon_R,ICV,0.008324
4,SFG_pole_L,3078,SFG_L,Frontal_L,CerebralCortex_L,Telencephalon_L,ICV,0.002227


In [40]:
import plotly.graph_objects as go


# Assuming subjectData is a pandas DataFrame that contains the columns 'roi', 'volume', 'level4',
# 'level3', 'level2', 'level1', 'icv', and 'comp'

# Create a list for each level including 'icv'
levels = ['icv', 'level1', 'level2','level3']
nodes = {level: list(subjectData[level].unique()) for level in levels}

# Create a list of labels (unique identifiers for each node)
labels = []
for level in levels:
    labels.extend(nodes[level])
labels = list(dict.fromkeys(labels))  # Remove duplicates

# Create a mapping from label to index
label_to_index = {label: i for i, label in enumerate(labels)}

# Initialize lists for sources, targets, and values
sources = []
targets = []
values = []

# Fill the source, target, and value lists based on the hierarchical levels
for i, row in subjectData.iterrows():
    for j in range(len(levels) - 1):
        sources.append(label_to_index[row[levels[j]]])
        targets.append(label_to_index[row[levels[j+1]]])
        # Use the comp value as the weight for the Sankey diagram connection
        values.append(row['comp'])

# Create the Sankey diagram
fig1 = go.Figure(data=[go.Sankey(
    node = dict(
        pad = 15,
        thickness = 20,
        line = dict(color = "black", width = 0.5),
        label = labels,
        color = "blue"
    ),
    link = dict(
        source = sources,
        target = targets,
        value = values
    )
)])

fig1.update_layout(
    title_text="Sankey Diagram",
    font_size=10,
    width=800,
    height=800,
    margin=dict(l=0, r=0, t=0, b=0) # Adjust left, right, top, bottom margins as needed
)

fig1.show()



In [41]:
fig1.write_html("sankey.html")
