In [1]:
import pandas as pd
import plotly.express as px
import requests
import io
import os

 Step 1: GDP Data from the World Bank API

In [2]:
csv_url = "https://raw.githubusercontent.com/bitcon1/my-first-repo/master/HWK5/imf-dm-export-20250224.csv"

response = requests.get(csv_url)
if response.status_code != 200:
    raise Exception("Error: Unable to fetch GDP data from the source.")

read into panda

In [3]:
gdp_data = pd.read_csv(io.StringIO(response.text))
print("✅ Column Names in Dataset:")
print(gdp_data.columns)

✅ Column Names in Dataset:
Index(['Country-Region', 'MF Numeric Code', 'IMF Alphabetic Code', '1980',
       '1981', '1982', '1983', '1984', '1985', '1986', '1987', '1988', '1989',
       '1990', '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998',
       '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007',
       '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016',
       '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024', '2025',
       '2026', '2027', '2028', '2029'],
      dtype='object')


we can select only the data that we need

In [4]:
year_columns = [col for col in gdp_data.columns if col.isnumeric()]
latest_year = max(year_columns)  # Get the latest available GDP year

In [5]:
gdp_data = gdp_data[["Country-Region", "MF Numeric Code", latest_year]].dropna()
gdp_data.columns = ["Country", "IMF Numeric Code", f"GDP ({latest_year})"]

In [6]:
gdp_data[f"GDP ({latest_year})"] = pd.to_numeric(gdp_data[f"GDP ({latest_year})"], errors="coerce")

Define IMF regions for Mapping

In [7]:
imf_regions = {
    900: "Africa",
    901: "Asia and Pacific",
    902: "Australia and New Zealand",
    903: "Caribbean",
    904: "Central America",
    905: "Central Asia and the Caucasus",
    906: "East Asia",
    907: "Eastern Europe",
    908: "Europe",
    909: "Middle East",
    910: "North Africa",
    911: "North America",
    912: "Pacific Islands",
    913: "South America",
    914: "South Asia",
    915: "Southeast Asia",
    916: "Sub-Saharan Africa",
    917: "Western Europe",
    918: "Western Hemisphere",
    919: "ASEAN-5",
    920: "Advanced Economies",
    921: "Emerging and Developing Asia",
    922: "Emerging and Developing Europe",
    923: "Emerging Markets",
    924: "Euro Area",
    925: "European Union",
    926: "Latin America and the Caribbean",
    927: "G7 Countries",
    928: "Middle East & Central Asia",
    929: "Other Advanced Economies",
    930: "Sub-Saharan Africa",
    999: "World",
}

Mapping IMF to the Region

In [8]:
gdp_data["Region"] = gdp_data["IMF Numeric Code"].map(imf_regions)

In [9]:
gdp_data = gdp_data.dropna()
dp_data_sorted = gdp_data.sort_values(by=["Region", f"GDP ({latest_year})"], ascending=[True, False])

In [10]:
print(gdp_data.dtypes)  # Check data types
print(gdp_data.head())  # Preview the first few rows

Country              object
IMF Numeric Code    float64
GDP (2029)          float64
Region               object
dtype: object
       Country  IMF Numeric Code  GDP (2029)              Region
2      Albania             914.0      36.137          South Asia
8      Armenia             911.0      33.203       North America
12  Azerbaijan             912.0      93.196     Pacific Islands
17     Belarus             913.0      91.426       South America
27    Bulgaria             918.0     140.758  Western Hemisphere


Create Interactive Map

In [11]:
import plotly.express as px
import plotly.io as pio

pio.renderers.default = "colab"

In [12]:
fig_bar = px.bar(
    gdp_data,
    x="Country",
    y=f"GDP ({latest_year})",
    color="Country",  # Different colors for each country
    title=f"Global GDP Distribution - {latest_year}",
    labels={f"GDP ({latest_year})": "GDP (in Million USD)"},
    text_auto=True
)

# Display the plot
fig_bar.show()

In [13]:
output_html = "gdp_stacked_bar_plot.html"
fig_bar.write_html(output_html)
print(f"✅ Interactive plot saved as: {output_html}")

✅ Interactive plot saved as: gdp_stacked_bar_plot.html


Save plot to HTML file

## Part 2 of Hwk

In [14]:
import plotly.graph_objects as go
subjectData = pd.read_csv("https://raw.githubusercontent.com/smart-stats/ds4bio_book/main/book/assetts/kirby21AllLevels.csv")

In [15]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

In [16]:
multilevel_lookup_data = {
    "roi": ["A", "B", "C", "D", "E", "F", "G", "H"],
    "level1": ["Intracranial Volume"] * 8,
    "level2": ["Region1", "Region1", "Region2", "Region2", "Region3", "Region3", "Region4", "Region4"],
    "level3": ["Subregion1", "Subregion2", "Subregion3", "Subregion4", "Subregion5", "Subregion6", "Subregion7", "Subregion8"]
}

In [17]:
multilevel_lookup = pd.DataFrame(multilevel_lookup_data)

Load the subject data

In [18]:
id = 127
subjectData = pd.read_csv("https://raw.githubusercontent.com/smart-stats/ds4bio_book/main/book/assetts/kirby21AllLevels.csv")

In [19]:
subjectData = subjectData.loc[(subjectData["type"] == 1) & (subjectData["level"] == 5) & (subjectData["id"] == id)]
subjectData = subjectData[['roi', 'volume']]

In [20]:
subjectData = pd.merge(subjectData, multilevel_lookup, on="roi", how="left")

Normalizing Data at this point

In [21]:
subjectData = subjectData.assign(icv="Intracranial Volume")
subjectData = subjectData.assign(comp=subjectData['volume'] / np.sum(subjectData['volume']))

data preparation for Sankey PLot

In [22]:
source_nodes = []
target_nodes = []
values = []
unique_labels = list(set(subjectData["icv"].tolist() + subjectData["level1"].tolist() +
                         subjectData["level2"].tolist() + subjectData["level3"].tolist()))

label_to_index = {label: i for i, label in enumerate(unique_labels)}

In [23]:
for _, row in subjectData.iterrows():
    if row["icv"] and row["level1"]:
        source_nodes.append(label_to_index[row["icv"]])
        target_nodes.append(label_to_index[row["level1"]])
        values.append(row["comp"])

    if row["level1"] and row["level2"]:
        source_nodes.append(label_to_index[row["level1"]])
        target_nodes.append(label_to_index[row["level2"]])
        values.append(row["comp"])

    if row["level2"] and row["level3"]:
        source_nodes.append(label_to_index[row["level2"]])
        target_nodes.append(label_to_index[row["level3"]])
        values.append(row["comp"])

Do the Sankey PLot

In [24]:
fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=unique_labels
    ),
    link=dict(
        source=source_nodes,
        target=target_nodes,
        value=values
    ))])

fig.update_layout(title_text="MRICloud Data as a Sankey Diagram", font_size=10)
fig.show()

In [26]:
sankey_data = go.Figure(go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=["Intracranial Volume", "Gray Matter", "White Matter", "CSF", "Brainstem"],
        color=["blue", "green", "red", "orange", "purple"]
    ),
    link=dict(
        source=[0, 0, 0, 1, 2],  # From nodes
        target=[1, 2, 3, 4, 4],  # To nodes
        value=[8, 6, 4, 2, 3]    # Flow values
    )
))

sankey_data.update_layout(title_text="Sankey Diagram Example", font=dict(size=12))


In [27]:
output_html = "sankey.html"
sankey_data.write_html(output_html)
print(f"✅ Interactive Sankey diagram saved as: {output_html}")

✅ Interactive Sankey diagram saved as: sankey.html


## Part 3; This section is for posting SANKEY.HTML to my pubic Repository

https://bitcon1.github.io/sankey-visualization/sankey.html