# Import Libraries & Setup

In [None]:
from google.colab import drive
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [None]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Malaria Deaths

In [None]:
# Import Dataset 
file_path = '/content/drive/MyDrive/global-malaria-deaths-by-world-region.csv'
df = pd.read_csv(file_path)

In [None]:
df

Unnamed: 0,Entity,Code,Year,malaria_deaths
0,Africa,,2000,840000
1,Africa,,2001,838000
2,Africa,,2002,797000
3,Africa,,2003,774000
4,Africa,,2004,750000
...,...,...,...,...
121,Western Pacific,,2016,2900
122,Western Pacific,,2017,3000
123,Western Pacific,,2018,3000
124,Western Pacific,,2019,2600


In [None]:
# Generate Bar Plot - Death Count Per World Region
fig = px.bar(df, x='Entity', y='malaria_deaths', animation_frame='Year', range_y=[0, max(df['malaria_deaths'])], color='Entity',
             title='Malaria Deaths per World Region',
             labels={'malaria_deaths': 'Death Count', 'Year': 'Year', 'Entity': 'World Region'})

fig.update_layout(xaxis={'categoryorder': 'total descending'})

# Adjust animation settings
fig.update_layout(updatemenus=[
    dict(type='buttons', showactive=False, buttons=[
        dict(label='Play', method='animate', args=[None, {
            'frame': {'duration': 1000, 'redraw': True},
            'fromcurrent': True,
            'transition': {'duration': 500, 'easing': 'linear'}
        }]),
        dict(label='Stop', method='animate', args=[[None], {
            'frame': {'duration': 0, 'redraw': False},
            'mode': 'immediate',
            'transition': {'duration': 0}
        }])
    ])
])

# Change animation effect to pop
fig.update_traces(marker=dict(line=dict(color='black', width=1)))
fig.update_layout(transition=dict(duration=500, easing='linear'))

# Add count annotations
fig.update_traces(texttemplate='%{y}', textposition='outside')

fig.show()


In [None]:
# Import Dataset 
file_path = '/content/drive/MyDrive/malaria-deaths-by-age.csv'
df2 = pd.read_csv(file_path)

In [None]:
df2

Unnamed: 0,Entity,Code,Year,Deaths - Malaria - Sex: Both - Age: 70+ years (Number),Deaths - Malaria - Sex: Both - Age: 50-69 years (Number),Deaths - Malaria - Sex: Both - Age: 15-49 years (Number),Deaths - Malaria - Sex: Both - Age: 5-14 years (Number),Deaths - Malaria - Sex: Both - Age: Under 5 (Number)
0,Afghanistan,AFG,1990,5,19,46,3,21
1,Afghanistan,AFG,1991,11,38,94,6,41
2,Afghanistan,AFG,1992,12,44,125,8,51
3,Afghanistan,AFG,1993,5,17,58,3,24
4,Afghanistan,AFG,1994,9,33,110,7,52
...,...,...,...,...,...,...,...,...
6835,Zimbabwe,ZWE,2015,28,147,329,539,1475
6836,Zimbabwe,ZWE,2016,26,114,250,441,1219
6837,Zimbabwe,ZWE,2017,28,118,261,460,1249
6838,Zimbabwe,ZWE,2018,28,118,261,468,1213


In [None]:
# Group by age group and sum the death counts
data_grouped = df2.groupby('Year').sum().reset_index()

# Convert the data to long format
data_long = data_grouped.melt(id_vars='Year', value_vars=['Deaths - Malaria - Sex: Both - Age: 70+ years (Number)',
                                                         'Deaths - Malaria - Sex: Both - Age: 50-69 years (Number)',
                                                         'Deaths - Malaria - Sex: Both - Age: 15-49 years (Number)',
                                                         'Deaths - Malaria - Sex: Both - Age: 5-14 years (Number)',
                                                         'Deaths - Malaria - Sex: Both - Age: Under 5 (Number)'],
                              var_name='Age Group', value_name='Death Count')

# Create the pie chart
fig = px.pie(data_long, values='Death Count', names='Age Group',
             title='Malaria Deaths by Age Group',
             labels={'Death Count': 'Death Count', 'Age Group': 'Age Group'})

# Show the figure
fig.show()


The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



In [None]:
# Melt the dataset to create a column for age group and a column for the number of deaths
melted_df = pd.melt(df2, id_vars=["Entity", "Year"], value_vars=["Deaths - Malaria - Sex: Both - Age: 70+ years (Number)",
                                                                 "Deaths - Malaria - Sex: Both - Age: 50-69 years (Number)",
                                                                 "Deaths - Malaria - Sex: Both - Age: 15-49 years (Number)",
                                                                 "Deaths - Malaria - Sex: Both - Age: 5-14 years (Number)",
                                                                 "Deaths - Malaria - Sex: Both - Age: Under 5 (Number)"],
                    var_name="Age Group", value_name="Number of Deaths")

# Create a new column for the sum of deaths for each year
melted_df["Sum of Deaths"] = melted_df.groupby(["Entity", "Year"])["Number of Deaths"].transform("sum")

# Create the stacked bar chart using Plotly Express
fig = px.bar(melted_df, x="Year", y="Number of Deaths", color="Age Group", barmode="stack",
             title="Malaria Deaths by Age Group",
             labels={"Number of Deaths": "Number of Deaths", "Age Group": "Age Group"})

# Update the layout with a dropdown menu for entity/country selection
fig.update_layout(
    updatemenus=[
        dict(
            active=0,
            buttons=list([
                dict(
                    label=entity,
                    method="update",
                    args=[{"y": [melted_df[(melted_df["Entity"] == entity) & (melted_df["Age Group"] == age_group)]["Number of Deaths"] / melted_df[(melted_df["Entity"] == entity) & (melted_df["Age Group"] == age_group)]["Sum of Deaths"] * 100 for age_group in melted_df["Age Group"].unique()]}],
                )
                for entity in melted_df["Entity"].unique()
            ]),
        )
    ]
)

# Show the figure
fig.show()

In [None]:
# Import Dataset 
file_path_3 = '/content/drive/MyDrive/malaria-deaths-by-region.csv'
df3 = pd.read_csv(file_path_3)

In [None]:
df3

Unnamed: 0,Entity,Code,Year,Deaths - Malaria - Sex: Both - Age: All Ages (Number)
0,Afghanistan,AFG,1990,93
1,Afghanistan,AFG,1991,189
2,Afghanistan,AFG,1992,239
3,Afghanistan,AFG,1993,108
4,Afghanistan,AFG,1994,211
...,...,...,...,...
6835,Zimbabwe,ZWE,2015,2518
6836,Zimbabwe,ZWE,2016,2050
6837,Zimbabwe,ZWE,2017,2116
6838,Zimbabwe,ZWE,2018,2088


In [None]:
# Define the entities to exclude from the plot
exclude_entities = ['Eastern Mediterranean Region (WHO)', 'East Asia & Pacific (WB)', 'African Region (WHO)', 'Europe & Central Asia (WB)', 'European Region (WHO)', 'G20', 'Latin America & Caribbean (WB)', 'Middle East & North Africa (WB)', 'North America (WB)', 'OECD Countries', 'Region of the Americas (WHO)', 'South-East Asia Region (WHO)', 'Sub-Saharan Africa (WB)','Western Pacific Region (WHO)', 'World', 'World Bank High Income', 'World Bank High Income', 'World Bank Lower Middle Income', 'World Bank Upper Middle Income', 'South Asia (WB)', 'World Bank Low Income']

# Filter the dataframe to exclude the specified entities
filtered_df = df3[~df3['Entity'].isin(exclude_entities)]

# Group the filtered data by Entity and calculate the total death count for each entity
df_grouped = filtered_df.groupby('Entity')['Deaths - Malaria - Sex: Both - Age: All Ages (Number)'].sum().reset_index()

# Sort the data by the total death count in descending order
df_sorted = df_grouped.sort_values(by='Deaths - Malaria - Sex: Both - Age: All Ages (Number)', ascending=False)

# Select the top ten entities and sort in descending order
top_ten = df_sorted.head(10).sort_values(by='Deaths - Malaria - Sex: Both - Age: All Ages (Number)', ascending=True)

# Create the horizontal bar chart
fig = px.bar(top_ten, y='Entity', x='Deaths - Malaria - Sex: Both - Age: All Ages (Number)', orientation='h')

# Update the layout
fig.update_layout(
    title='Top 10 Countries With Highest Death by Malaria (2000-2019)',
    yaxis=dict(title='Country'),
    xaxis=dict(title='Death Count')
)

# Show the chart
fig.show()

In [None]:
# Calculate the total deaths by country
df_agg = df3.groupby(['Entity', 'Code'])['Deaths - Malaria - Sex: Both - Age: All Ages (Number)'].sum().reset_index()

# Create the map visualization with Plotly
fig = px.choropleth(df_agg, locations='Entity', locationmode='country names', color='Deaths - Malaria - Sex: Both - Age: All Ages (Number)',
                    hover_name='Entity', projection='natural earth',
                    color_continuous_scale='YlOrRd', range_color=(0, 7000000))

fig.update_layout(title='Malaria Deaths by Country',
                  coloraxis_colorbar=dict(title='Deaths'))

fig.show()

# Malaria Death Rates 

> Death counts can be misleading. Death Rates reveal more information regarding the spread or presence of a disease/infection. 

In [None]:
# Import Dataset 
file_path_4 = '/content/drive/MyDrive/malaria-death-rates.csv'
df4 = pd.read_csv(file_path_4)

In [None]:
df4

Unnamed: 0,Entity,Code,Year,Deaths - Malaria - Sex: Both - Age: Age-standardized (Rate)
0,Afghanistan,AFG,1990,0.83
1,Afghanistan,AFG,1991,1.57
2,Afghanistan,AFG,1992,1.75
3,Afghanistan,AFG,1993,0.71
4,Afghanistan,AFG,1994,1.35
...,...,...,...,...
6835,Zimbabwe,ZWE,2015,14.92
6836,Zimbabwe,ZWE,2016,11.96
6837,Zimbabwe,ZWE,2017,12.24
6838,Zimbabwe,ZWE,2018,11.98


In [None]:

# Create the map plot using Plotly Express
fig = px.choropleth(
    data_frame=df4,
    locations='Code',
    locationmode='ISO-3',
    color='Deaths - Malaria - Sex: Both - Age: Age-standardized (Rate)',
    color_continuous_scale='YlOrRd',
    hover_name='Entity',
    hover_data={'Deaths - Malaria - Sex: Both - Age: Age-standardized (Rate)': ':,.2f'},
    labels={'Deaths - Malaria - Sex: Both - Age: Age-standardized (Rate)': 'Death Rate'},
    title='Death Rates - Malaria (Age-standardized)',
    animation_frame='Year'
)

# Configure layout
fig.update_layout(
    geo=dict(
        showcoastlines=True,
        showframe=False,
        showocean=True,
        oceancolor='lightblue',
        showlakes=True,
        lakecolor='lightblue',
        projection_type='natural earth'
    ),
    coloraxis_colorbar=dict(
        title='Death Rate'
    ),
    autosize=True,
    margin=dict(l=0, r=0, t=50, b=0)
)

# Enable map zooming and panning
fig.update_geos(
    resolution=50,
    showcountries=True,
    countrycolor='black',
    showsubunits=True,
    subunitcolor='gray'
)

# Show the plot
fig.show()

In [None]:
# Sort the dataset by the death rates in descending order for all years
sorted_df = df4.sort_values(by='Deaths - Malaria - Sex: Both - Age: Age-standardized (Rate)', ascending=False)

# Create a list of unique years in descending order
years = sorted_df['Year'].unique()[::-1]

# Create the figure with a dropdown selector for the year
fig = go.Figure()

# Add traces for each year
for year in years:
    year_df = sorted_df[sorted_df['Year'] == year]
    top_10 = year_df.nlargest(10, 'Deaths - Malaria - Sex: Both - Age: Age-standardized (Rate)')  # Sort by highest rates
    
    fig.add_trace(go.Bar(
        x=top_10['Deaths - Malaria - Sex: Both - Age: Age-standardized (Rate)'][::-1],  # Reverse the order
        y=top_10['Entity'][::-1],  # Reverse the order
        orientation='h',
        name=str(year),
        visible=False  # Set initial visibility to False
    ))

# Set visibility to True for the first year
fig.data[0].visible = True

# Create the dropdown selector
dropdown_buttons = []
for i, year in enumerate(years):
    dropdown_buttons.append(
        dict(
            method='update',
            args=[
                {'visible': [i == j for j in range(len(years))]},  # Set visibility for each year
                {'title': f'Top 10 Countries with Highest Death Rates by Malaria ({year})'}  # Update the title
            ],
            label=str(year)
        )
    )

# Add the dropdown selector to the layout
fig.update_layout(
    updatemenus=[
        dict(
            buttons=dropdown_buttons,
            direction='down',
            showactive=True,
            active=0,
            x=0.5,  # Move the dropdown selector to the center
            y=1.1,  # Move the dropdown selector slightly above the title
            font=dict(color='black')
        )
    ],
    title='Top 10 Countries with Highest Death Rates by Malaria',
    xaxis_title='Death Rate',
    yaxis_title='Country',
)

# Show the figure
fig.show()

In [None]:
# Import Dataset 
file_path_5 = '/content/drive/MyDrive/malaria-death-rates-by-age.csv'
df5 = pd.read_csv(file_path_5)

In [None]:
df5

Unnamed: 0,Entity,Code,Year,Deaths - Malaria - Sex: Both - Age: Under 5 (Rate),Deaths - Malaria - Sex: Both - Age: All Ages (Rate),Deaths - Malaria - Sex: Both - Age: 70+ years (Rate),Deaths - Malaria - Sex: Both - Age: 50-69 years (Rate),Deaths - Malaria - Sex: Both - Age: 15-49 years (Rate),Deaths - Malaria - Sex: Both - Age: 5-14 years (Rate),Deaths - Malaria - Sex: Both - Age: Age-standardized (Rate)
0,Afghanistan,AFG,1990,1.03,0.82,1.52,1.44,0.96,0.09,0.83
1,Afghanistan,AFG,1991,1.75,1.50,3.21,2.83,1.74,0.18,1.57
2,Afghanistan,AFG,1992,1.75,1.65,3.55,3.15,1.95,0.22,1.75
3,Afghanistan,AFG,1993,0.72,0.67,1.37,1.24,0.82,0.09,0.71
4,Afghanistan,AFG,1994,1.45,1.26,2.55,2.35,1.49,0.16,1.35
...,...,...,...,...,...,...,...,...,...,...
6835,Zimbabwe,ZWE,2015,69.70,17.89,10.83,13.22,4.74,14.77,14.92
6836,Zimbabwe,ZWE,2016,57.43,14.33,9.86,10.01,3.53,11.93,11.96
6837,Zimbabwe,ZWE,2017,58.86,14.56,10.28,10.14,3.61,12.25,12.24
6838,Zimbabwe,ZWE,2018,57.32,14.14,10.10,9.84,3.54,12.31,11.98


In [None]:
import plotly.graph_objects as go

# Remove specific columns from age_groups list
age_groups = [col for col in age_groups if col not in ['Deaths - Malaria - Sex: Both - Age: All Ages (Rate)',
                                                       'Deaths - Malaria - Sex: Both - Age: Age-standardized (Rate)']]

# Create traces for each age group for all entities
data = []
for age_group in age_groups:
    for entity in entities:
        trace = go.Scatter(
            x=df5[df5['Entity'] == entity]['Year'],
            y=df5[df5['Entity'] == entity][age_group],
            mode='lines',
            name=f'{entity} - {age_group}'
        )
        data.append(trace)

# Create a dropdown menu for entity selection
buttons = []
for entity in entities:
    button = dict(
        label=entity,
        method="update",
        args=[{"visible": [entity in trace['name'] for trace in data]},
              {"title": f"Death Rates per Age Group for {entity}"}]
    )
    buttons.append(button)

# Create the layout with entity dropdown menu
layout = go.Layout(
    title='Death Rates per Age Group',
    xaxis=dict(title='Year'),
    yaxis=dict(title='Death Rate'),
    updatemenus=[dict(
        buttons=buttons,
        active=0,
        x=0.1,
        xanchor='left',
        y=1.0,
        yanchor='top'
    )]
)

# Create the figure
fig = go.Figure(data=data, layout=layout)

# Display the plot
fig.show()


# Incidence of Malaria

In [None]:
# Import Dataset 
file_path_6 = '/content/drive/MyDrive/incidence-of-malaria.csv'
df6 = pd.read_csv(file_path_6)

In [None]:
df6.head(5)

Unnamed: 0,Entity,Code,Year,"Incidence of malaria (per 1,000 population at risk)"
0,Afghanistan,AFG,2000,81.969559
1,Afghanistan,AFG,2001,78.832069
2,Afghanistan,AFG,2002,79.385788
3,Afghanistan,AFG,2003,68.09156
4,Afghanistan,AFG,2004,37.616467


In [None]:
# Calculate the maximum incidence value
max_incidence = df6['Incidence of malaria (per 1,000 population at risk)'].max()

# Create the map using Plotly Express
fig = px.choropleth(df6,  # DataFrame containing the data
                    locations='Code',  # Column with country codes
                    locationmode='ISO-3',  # Set location mode to use country codes
                    color='Incidence of malaria (per 1,000 population at risk)',  # Column to use for color scale
                    hover_name='Entity',  # Column to use for hover labels
                    animation_frame='Year',  # Column to use for animation
                    color_continuous_scale='reds',  # Color scale
                    range_color=(0, max_incidence),  # Set the color scale range
                    title='Incidence of Malaria')  # Title of the chart

# Update the layout for better interactivity
fig.update_layout(geo=dict(showframe=False,  # Hide the frame around the map
                           showcoastlines=False,  # Hide the coastlines
                           projection_type='natural earth'),  # Use natural earth projection
                  coloraxis_colorbar=dict(title='Incidence',  # Set the colorbar title
                                          lenmode='fraction',  # Set the length mode
                                          len=0.75,  # Set the length
                                          yanchor='middle',  # Set the y anchor
                                          y=0.5,  # Set the y position
                                          tickfont=dict(size=10)),  # Set the tick font size
                  autosize=True,  # Auto-size the plot
                  margin=dict(l=0, r=0, t=50, b=0),  # Adjust the margins
                  coloraxis_colorbar_x=-0.05,  # Adjust the colorbar x position
                  coloraxis_colorbar_len=0.6)  # Adjust the colorbar length

# Show the plot
fig.show()

In [None]:
# Create traces for each entity
data = []
for entity in df6['Entity'].unique():
    trace = go.Scatter(
        x=df6[df6['Entity'] == entity]['Year'],
        y=df6[df6['Entity'] == entity]['Incidence of malaria (per 1,000 population at risk)'],
        mode='lines',
        name=entity
    )
    data.append(trace)

# Create a dropdown menu for entity selection
buttons = []
for entity in df6['Entity'].unique():
    button = dict(
        label=entity,
        method="update",
        args=[{"visible": [entity in trace['name'] for trace in data]},
              {"title": f"Incidence of Malaria for {entity}"}]
    )
    buttons.append(button)

# Create the layout with entity dropdown menu
layout = go.Layout(
    title='Incidence of Malaria',
    xaxis=dict(title='Year'),
    yaxis=dict(title='Incidence'),
    updatemenus=[
        dict(
            buttons=buttons,
            active=0,
            x=1.0,  # Set the x position to the right side
            xanchor='right',  # Set the x anchor to right
            y=1.0,  # Set the y position to the top
            yanchor='top'  # Set the y anchor to top
        )
    ]
)

# Create the figure
fig = go.Figure(data=data, layout=layout)

# Display the plot
fig.show()

In [None]:
# Create traces for each entity
data = []
for entity in df3['Entity'].unique():
    trace = go.Scatter(
        x=df3[df3['Entity'] == entity]['Year'],
        y=df3[df3['Entity'] == entity]['Deaths - Malaria - Sex: Both - Age: All Ages (Number)'],
        mode='lines',
        name=entity
    )
    data.append(trace)

# Create a dropdown menu for entity selection
buttons = []
for entity in df3['Entity'].unique():
    button = dict(
        label=entity,
        method="update",
        args=[{"visible": [entity in trace['name'] for trace in data]},
              {"title": f"Deaths from Malaria for {entity}"}]
    )
    buttons.append(button)

# Create the layout with entity dropdown menu
layout = go.Layout(
    title='Deaths from Malaria',
    xaxis=dict(title='Year'),
    yaxis=dict(title='Number of Deaths'),
    updatemenus=[
        dict(
            buttons=buttons,
            active=0,
            x=1.0,  # Set the x position to the right side
            xanchor='right',  # Set the x anchor to right
            y=1.0,  # Set the y position to the top
            yanchor='top'  # Set the y anchor to top
        )
    ]
)

# Create the figure
fig = go.Figure(data=data, layout=layout)

# Display the plot
fig.show()