In [15]:
import plotly.graph_objects as go
import plotly.io as pio

# Define the base template
base = go.layout.Template(
    layout=go.Layout(
        paper_bgcolor='#FFF5CC',
        plot_bgcolor='#FFF5CC',
        height=600,
        width=600 * 1.618,
        xaxis=dict(
            anchor='y',
            showgrid=True,
            tickfont=dict(
                size=14,
                family='Open Sans, sans-serif'  # Use Open Sans font
            ),
            titlefont=dict(
                size=16,
                family='Open Sans, sans-serif'  # Use Open Sans font
            )
        ),
        yaxis=dict(
            anchor='x',
            showgrid=True,
            tickfont=dict(
                size=14,
                family='Open Sans, sans-serif'
            ),
            titlefont=dict(
                size=16,
                family='Open Sans, sans-serif'
            )
        ),
        font=dict(
            color='#333333',
            size=18,
            family='Open Sans, sans-serif'
        ),
        colorway = ["#348273", "#280F3C", "#CC5500", "#333333", "#50C2A1", "#7A4171", "#FF8C42", "#808080"],
        title=go.layout.Title(
            text='',
            font=dict(
                size=24,
                color='#333333',
                family='Open Sans, sans-serif'
            ),
            x=0.05,
        ),
    ),
)

# Register the base template
pio.templates['base'] = base
pio.templates.default = 'base'


In [5]:
import requests
import pandas as pd

# URL for World Bank API to get country metadata
url_metadata = "http://api.worldbank.org/v2/country?format=json&per_page=300"

# Function to fetch country metadata from the World Bank API and return a DataFrame
def fetch_country_metadata(url):
    response = requests.get(url)
    data = response.json()
    
    # Extract relevant data
    records = []
    for entry in data[1]:
        country_id = entry['id']
        country_name = entry['name']
        region = entry['region']['value']
        income_level = entry['incomeLevel']['value']
        lending_type = entry['lendingType']['value']
        
        records.append([country_id, country_name, region, income_level, lending_type])
    
    df = pd.DataFrame(records, columns=["Country Code", "Country Name", "Region", "Income Level", "Lending Type"])
    return df

# Fetch country metadata
df_country_metadata = fetch_country_metadata(url_metadata)

# Display the DataFrame
print(df_country_metadata.head())

# Save the DataFrame to a CSV file
df_country_metadata.to_csv("country_metadata.csv", index=False)


  Country Code                 Country Name                      Region  \
0          ABW                        Aruba  Latin America & Caribbean    
1          AFE  Africa Eastern and Southern                  Aggregates   
2          AFG                  Afghanistan                  South Asia   
3          AFR                       Africa                  Aggregates   
4          AFW   Africa Western and Central                  Aggregates   

  Income Level    Lending Type  
0  High income  Not classified  
1   Aggregates      Aggregates  
2   Low income             IDA  
3   Aggregates      Aggregates  
4   Aggregates      Aggregates  


In [6]:
import requests
import pandas as pd

# URLs for World Bank API to get the data
urls = {
    "Life Expectancy - Male": "http://api.worldbank.org/v2/country/all/indicator/SP.DYN.LE00.MA.IN?format=json&per_page=20000",
    "Life Expectancy - Female": "http://api.worldbank.org/v2/country/all/indicator/SP.DYN.LE00.FE.IN?format=json&per_page=20000",
    "Life Expectancy - Total": "http://api.worldbank.org/v2/country/all/indicator/SP.DYN.LE00.IN?format=json&per_page=20000",
    "Under-five Mortality - Male": "http://api.worldbank.org/v2/country/all/indicator/SH.DYN.MORT.MA?format=json&per_page=20000",
    "Under-five Mortality - Female": "http://api.worldbank.org/v2/country/all/indicator/SH.DYN.MORT.FE?format=json&per_page=20000",
    "Under-five Mortality - Total": "http://api.worldbank.org/v2/country/all/indicator/SH.DYN.MORT?format=json&per_page=20000",
    "Adult Mortality - Male": "http://api.worldbank.org/v2/country/all/indicator/SP.DYN.AMRT.MA?format=json&per_page=20000",
    "Adult Mortality - Female": "http://api.worldbank.org/v2/country/all/indicator/SP.DYN.AMRT.FE?format=json&per_page=20000",
    #"Adult Mortality - Total": "http://api.worldbank.org/v2/country/all/indicator/SP.DYN.AMRT?format=json&per_page=20000",
}

# Function to fetch data from the World Bank API and return a DataFrame
def fetch_data(url, indicator_name, gender=None):
    response = requests.get(url)
    data = response.json()
    
    # Extract relevant data
    records = []
    for entry in data[1]:
        country = entry['country']['value']
        year = entry['date']
        value = entry['value']
        records.append([country, year, value, indicator_name, gender])
    
    df = pd.DataFrame(records, columns=["Country", "Year", "Value", "Indicator", "Gender"])
    return df

# Fetch data for all indicators
dfs = []
for indicator, url in urls.items():
    gender = None
    if "Male" in indicator:
        gender = "Male"
    elif "Female" in indicator:
        gender = "Female"
    elif "Total" in indicator:
        gender = "Total"
    dfs.append(fetch_data(url, indicator.split(" - ")[0], gender))

# Combine all the data into a single DataFrame
df_combined = pd.concat(dfs)

# Convert the Year column to numeric
df_combined['Year'] = pd.to_numeric(df_combined['Year'])

# Pivot the DataFrame to get a better structure for analysis
df_death_timing = df_combined.pivot_table(index=['Country', 'Year'], columns=['Indicator', 'Gender'], values='Value', aggfunc='first').reset_index()

# Flatten the MultiIndex columns
df_death_timing.columns = [' '.join(filter(None, col)).strip() for col in df_death_timing.columns.values]

# Display the DataFrame
print(df_death_timing.head())

# Save the DataFrame to a CSV file
df_death_timing.to_csv("death_timing.csv", index=False)


       Country  Year  Adult Mortality Female  Adult Mortality Male  \
0  Afghanistan  1960                 550.189               601.887   
1  Afghanistan  1961                 543.600               594.812   
2  Afghanistan  1962                 537.703               588.870   
3  Afghanistan  1963                 531.856               583.144   
4  Afghanistan  1964                 526.179               577.178   

   Life Expectancy Female  Life Expectancy Male  Life Expectancy Total  \
0                  33.285                31.870                 32.535   
1                  33.813                32.409                 33.068   
2                  34.297                32.883                 33.547   
3                  34.773                33.346                 34.016   
4                  35.246                33.828                 34.494   

   Under-five Mortality Female  Under-five Mortality Male  \
0                          NaN                        NaN   
1           

In [7]:
# Assuming df_death_timing and df_country_metadata are already loaded

# Join the dataframes on the 'Country' column from df_death_timing and 'Country Name' column from df_country_metadata
rbt_death_timing = pd.merge(df_death_timing, df_country_metadata, left_on="Country", right_on="Country Name", how="left")

# Display the first few rows of the joined DataFrame in a nicely formatted table
print(rbt_death_timing.head().to_string())

# Save the joined DataFrame to a CSV file
rbt_death_timing.to_csv("rbt_death_timing.csv", index=False)


       Country  Year  Adult Mortality Female  Adult Mortality Male  Life Expectancy Female  Life Expectancy Male  Life Expectancy Total  Under-five Mortality Female  Under-five Mortality Male  Under-five Mortality Total Country Code Country Name      Region Income Level Lending Type
0  Afghanistan  1960                 550.189               601.887                  33.285                31.870                 32.535                          NaN                        NaN                       354.6          AFG  Afghanistan  South Asia   Low income          IDA
1  Afghanistan  1961                 543.600               594.812                  33.813                32.409                 33.068                          NaN                        NaN                       348.9          AFG  Afghanistan  South Asia   Low income          IDA
2  Afghanistan  1962                 537.703               588.870                  34.297                32.883                 33.547             

In [30]:
import pandas as pd
import plotly.express as px

# Assuming rbt_death_timing is already created and available

def plot_metric(df, metric_column, group_by_column, agg_func='mean'):
    """
    Plots the specified metric segmented by the specified group with year on the x-axis.

    Parameters:
    df (pd.DataFrame): The DataFrame containing the data.
    metric_column (str): The metric column to plot (e.g., 'Life Expectancy Total', 'Adult Mortality Female', etc.).
    group_by_column (str): The column to group by (e.g., 'Country', 'Region', etc.).
    agg_func (str or function): Aggregation function to apply ('mean', 'sum', 'median', etc.).

    Returns:
    None
    """
    # Group by the specified column and year, then aggregate
    df_grouped = df.groupby(['Year', group_by_column])[metric_column].agg(agg_func).reset_index()

    # Create the line plot using Plotly
    fig = px.line(
        df_grouped,
        x='Year',
        y=metric_column,
        color=group_by_column,
        title=f'{metric_column} by {group_by_column}',
        template='base'
    )
    
    # Show the plot
    fig.show()

# Example usage
plot_metric(rbt_death_timing, 'Under-five Mortality Total', 'Country')
