In [3]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy.stats as st
import requests
import json
import hvplot.pandas

# Import the API key
from config import geoapify_key

# Study data files
mort_female_path = "Data/mortality_rate_female.csv"
mort_male_path = "Data/mortality_rate_male.csv"
life_df_path = "Data/life_df.csv"
world_path = "Data/world_latitude_longitude.csv"

# Read the mouse data and the study results
mort_female = pd.read_csv(mort_female_path)
mort_male = pd.read_csv(mort_male_path )
life_df = pd.read_csv(life_df_path) 
world_df = pd.read_csv(world_path)
# Combine the mortality data into a single DataFrame
mort_data_complete = pd.merge(mort_female, mort_male, how="left", on=["Country Name", "Country Name"])


# Display the data table for preview
mort_data_complete.head()

Unnamed: 0,Country Name,Country Code_x,Indicator Name_x,Indicator Code_x,1960_x,1961_x,1962_x,1963_x,1964_x,1965_x,...,2013_y,2014_y,2015_y,2016_y,2017_y,2018_y,2019_y,2020_y,2021_y,2022_y
0,Aruba,ABW,"Mortality rate, adult, female (per 1,000 femal...",SP.DYN.AMRT.FE,162.457,158.339,156.43,152.758,152.003,150.034,...,138.647,136.992,135.037,134.258,130.629,127.002,124.312,124.099,140.466,
1,Africa Eastern and Southern,AFE,"Mortality rate, adult, female (per 1,000 femal...",SP.DYN.AMRT.FE,384.245064,379.762566,376.873909,374.196782,366.660741,367.763972,...,344.462364,336.276302,329.495008,322.218327,314.493961,309.165671,302.772441,311.485312,332.823557,
2,Afghanistan,AFG,"Mortality rate, adult, female (per 1,000 femal...",SP.DYN.AMRT.FE,550.189,543.6,537.703,531.856,526.179,520.698,...,271.596,274.007,278.424,274.045,311.655,319.849,305.768,318.587,342.158,
3,Africa Western and Central,AFW,"Mortality rate, adult, female (per 1,000 femal...",SP.DYN.AMRT.FE,429.52364,426.334866,424.349362,421.439862,418.023332,415.872967,...,343.312223,341.80785,339.664654,333.254417,332.079861,329.087406,326.635576,337.587341,346.620097,
4,Angola,AGO,"Mortality rate, adult, female (per 1,000 femal...",SP.DYN.AMRT.FE,417.058,419.386,416.82,415.982,413.565,409.164,...,332.765,326.818,321.296,316.08,308.611,308.888,305.594,313.481,331.364,


In [None]:

# Create a list of column we want to analyze

columns_to_analyze = ['Country Name', '2019_x', '2019_y', '2020_x', '2020_y', '2021_x', '2021_y']

# Extract the specified columns into a new DataFrame
mort_df = mort_data_complete[columns_to_analyze]

#Rename Columns
# Create a dictionary of column name changes
column_names = {'2019_x': '2019 Female Mortality', '2019_y': '2019 Male Mortality', '2020_x': '2020 Female Mortality', '2020_y': '2020 Male Mortality','2021_x': '2021 Female Mortality', '2021_y': '2021 Male Mortality'}

# Rename the columns using the dictionary
mort_df = mort_df.rename(columns=column_names)


# Print the new DataFrame
mort_df.head()


In [None]:
#Merge the mortality data and the life expectancy/medical doctor/average immunization data together 
mort_life = pd.merge(mort_df, life_df, how="left", on=["Country Name", "Country Name"])


# Display the data table for preview
mort_life.head()

In [None]:
#Plot the 15 countries with the highest life expectancy
sortedtop_life_df = mort_life.sort_values(by=['Life expectancy total population'], ascending=False)

sortedtop_life_df.head(15)

In [None]:
#Plot the 15 countries with the lowest life expectancy
sortedbottom_life_df = mort_life.sort_values(by=['Life expectancy total population'], ascending=True)

sortedbottom_life_df.head(15)

In [None]:
#Analyze the data for the top 5 countries with the best life expectancy
# Create a list of countries to extract
countries = ['Japan', 'Switzerland', 'Spain', 'Italy', 'Singapore']

# Extract rows where Country Name is in the list of countries
top_life = mort_life[mort_life['Country Name'].isin(countries)]

# Display the selected rows
top_life

In [None]:
#Remove Spain and Italy from the dataset as they have missing data from 2020 and/or 2021. Add the next two top countries with the highest life expectancies and data for the mortality rate in 2020 and 2021 (Swedan and Luxembourg).
#Analyze the data for the top 5 countries with the best life expectancy
# Create a list of countries to extract
countries = ['Japan', 'Switzerland', 'Singapore', 'Sweden', 'Luxembourg',]

# Extract rows where Country Name is in the list of countries
top_life = mort_life[mort_life['Country Name'].isin(countries)]

# Display the selected rows
top_life

In [None]:
#Analyze the data for the 5 countries with the worst life expectancy
# Create a list of countries to extract
countries = ['Central African Republic', 'Lesotho', 'Chad', 'Sierra Leone', 'Nigeria']

# Extract rows where Country Name is in the list of countries
low_life = mort_life[mort_life['Country Name'].isin(countries)]

# Display the selected rows
low_life

## Bar graphs of mortality rates (per 1000 adults) of top 5 and bottom 5 countries in terms of life expectancy pre-covid and during covid from 2019 to 2021

In [None]:
# Generate a bar of Years vs. Mortality Rate for females in the top 5 countries

top_life.plot(x='Country Name', y=['2019 Female Mortality', '2020 Female Mortality', '2021 Female Mortality'], kind='bar')

# Set the figure size
plt.xlabel('Country')
plt.ylabel('Mortality Rate (Per 1000 adults) ')
plt.title('Mortality Rates (Per 1000 adults) of Females in the Top 5 Countries from 2019-2021')
plt.legend(['2019 Female', '2020 Female', '2021 Female'], loc='best')

plt.tight_layout()
plt.show()


In [None]:
# Generate a bar graph of Years vs. Mortality Rate for males in the top 5 countries

top_life.plot(x='Country Name', y=['2019 Male Mortality', '2020 Male Mortality', '2021 Male Mortality'], kind='bar')

# Set the figure size
plt.xlabel('Country')
plt.ylabel('Mortality Rate (Per 1000 adults) ')
plt.title('Mortality Rates (Per 1000 adults) of Males in the Top 5 Countries from 2019-2021')
plt.legend(['2019 Male', '2020 Male', '2021 Male'], loc='lower right')

plt.tight_layout()
plt.show()



In [None]:
# Generate a bar graph of Years vs. Mortality Rate for females in the bottom 5 countries

low_life.plot(x='Country Name', y=['2019 Female Mortality', '2020 Female Mortality', '2021 Female Mortality'], kind='bar')

# Set the figure size
plt.xlabel('Country')
plt.ylabel('Mortality Rate (Per 1000 adults) ')
plt.title('Mortality Rates (Per 1000 adults) of Females in the Bottom 5 Countries from 2019-2021')
plt.legend(['2019 Female', '2020 Female', '2021 Female'], loc='lower right')

plt.tight_layout()
plt.show()


In [None]:
# Generate a bar graph of Years vs. Mortality Rate for males in the bottom 5 countries

low_life.plot(x='Country Name', y=['2019 Male Mortality', '2020 Male Mortality', '2021 Male Mortality'], kind='bar')

# Set the figure size
plt.xlabel('Country')
plt.ylabel('Mortality Rate (Per 1000 adults) ')
plt.title('Mortality Rates (Per 1000 adults) of Males in the Bottom 5 Countries from 2019-2021')
plt.legend(['2019 Male', '2020 Male', '2021 Male'], loc='lower right')

plt.tight_layout()
plt.show()



## Plot Map 

In [None]:
#Assess the top 5 countries and bottom 5 countries

outline_df =  countries = ['Japan', 'Switzerland', 'Singapore', 'Sweden', 'Luxembourg','Central African Republic', 'Lesotho', 'Chad', 'Sierra Leone', 'Nigeria']

# Extract rows where Country Name is in the list of countries
outline_df = mort_life[mort_life['Country Name'].isin(countries)]

#Sort the values from highest life expectancy to lowest
outline_df = outline_df.sort_values(by=['Life expectancy total population'], ascending=False)


outline_df

In [None]:
# Generate a bar plot of Years vs. Mortality Rate for females in the top 5 and bottom 5 countries

outline_df.plot(x='Country Name', y=['2019 Female Mortality', '2020 Female Mortality', '2021 Female Mortality'], kind='bar')

# Set the figure size
plt.xlabel('Country')
plt.ylabel('Mortality Rate (Per 1000 Adults) ')
plt.title('Mortality Rates (Per 1000 adults) of Females in the Top and Bottom 5 Countries in terms of Life Expectancy from 2019-2021')
plt.legend(['2019', '2020', '2021'], loc='lower right')

plt.tight_layout()
plt.show()

In [None]:
# Generate a bar graph of Years vs. Mortality Rate for males in the top and bottom 5 countries in terms of life expectancy

outline_df.plot(x='Country Name', y=['2019 Male Mortality', '2020 Male Mortality', '2021 Male Mortality'], kind='bar')


plt.xlabel('Country')
plt.ylabel('Mortality Rate(Per 1000 Adults) ')
plt.title('Mortality Rates (Per 1000 adults) of Males in the Top and Bottom 5 Countries in terms of Life Expectancy from 2019-2021')
plt.legend(['2019', '2020', '2021'], loc='lower right')

plt.tight_layout()
plt.show()



### Plotting the data onto a map, and determine the number of hospitals in each country 

In [None]:
world_df.head(10)

In [None]:
#Clean the data to only assess the 5 countries with the highest and lowest life expectancies 
life_lat_lng_df =  countries = ['Japan', 'Switzerland', 'Singapore', 'Sweden', 'Luxembourg','Central African Republic', 'Lesotho', 'Chad', 'Sierra Leone', 'Nigeria']

# Extract rows where Country Name is in the list of countries
life_lat_lng_df = world_df[world_df['country'].isin(countries)]


life_lat_lng_df

In [None]:
#Clean the data in the to extract the data for the latitude and longitude and country
# Create a list of column we want to analyze

columns_to_analyze = ['country','longitude', 'latitude']

# Extract the specified columns into a new DataFrame
lat_lng_df = life_lat_lng_df[columns_to_analyze]

#Rename Columns
# Create a dictionary of column name changes
column_names = {'country': 'Country Name', 'latitude': 'LAT', 'longitude': 'LNG'}

# Rename the columns using the dictionary
lat_lng_df = lat_lng_df.rename(columns=column_names)


# Print the new DataFrame
lat_lng_df


In [None]:
#Merge the lat_lng_df and the outline_df together
lat_long_complete = pd.merge(lat_lng_df, outline_df, how="left", on=["Country Name", "Country Name"])
lat_long_complete = lat_long_complete.sort_values(by=['Life expectancy total population'], ascending=False)
lat_long_complete

In [None]:
# Set the parameters for the type of search
radius = 500000

# Create an empty DataFrame to store the hospital data
hospital_df = pd.DataFrame(columns=["Country Name", "Latitude", "Longitude", "Hospital Count"])

# Iterate through the hotel_df DataFrame
for index, row in lat_long_complete.iterrows():

    # Get the country's name
    country = row["Country Name"]
    
    # Get latitude and longitude from the DataFrame
    latitude = row["LAT"]
    longitude = row["LNG"]
    
    # Set up a parameters dictionary
    params = {
        "limit": 150,
        "apiKey": geoapify_key,
        "format": "json",
        "categories": "healthcare.hospital",
        "filter": f"circle:{longitude},{latitude},{radius}",
        "bias": f"proximity:{longitude},{latitude}"
    }

    # Set base URL
    base_url = "https://api.geoapify.com/v2/places"

    # Make an API request using the params dictionary
    response = requests.get(base_url, params=params)

    # Convert the API response to JSON format
    data = response.json()

    # Count the number of hospitals in the response
    hospital_count = len(data["features"])

    # Append the data to the hospital_df DataFrame
    hospital_df = hospital_df.append({"Country Name": country, "Latitude": latitude, "Longitude": longitude, "Hospital Count": hospital_count}, ignore_index=True)

# Display the hospital_df DataFrame
hospital_df


In [None]:
#Merge the data from the outline.df and the hospital.df
final_complete = pd.merge(hospital_df, outline_df, how="left", on=["Country Name", "Country Name"])
final_complete = final_complete.sort_values(by=['Life expectancy total population'], ascending=False)
final_complete

In [None]:
%%capture --no-display

# Configure the map plot
map_plot = final_complete.hvplot.points(
    "Latitude",
    "Longitude",
    geo=True,
    tiles="OSM",
    frame_width=700,
    frame_height=700,
    #size="Life expectancy total population",
    color = "Country Name",
    #hover_cols=["Hospital Count", "Average Immunization", "Medical doctors (number)"]
).opts(title="Plot Title")

map_plot

## T-test analyzing significance of the 5 countries with the highest and lowest life expectancy and if there is a signficiance in their average immunization rates

In [None]:
#Clean the outline_df data to only look at country name and average immunization 
columns_to_analyze = ['Country Name','Average Immunization']

# Extract the specified columns into a new DataFrame
immunization_df = outline_df[columns_to_analyze]

immunization_df 

In [None]:
#Split the data into 2 groups, 1 group with the highest 

# Define the condition to split the data
group1_condition = immunization_df['Country Name'].isin(['Japan', 'Switzerland', 'Singapore', 'Luxembourg', 'Sweden'])
group2_condition = ~group1_condition

# Convert group 1 countries to a specific value
group1 = immunization_df['group1_condition']
group2 =immunization_df.loc[group2_condition, 'Group'] = 'Group 2'

group1 

In [None]:
  # Scatter Plot of Data
plt.subplot(2, 1, 1)
plt.scatter(range(len(group1)), group1, label="Top 5 Countries")
plt.scatter(range(len(group2)), group2, label="Bottom 5 Countries")
plt.legend()

# Histogram Plot of Data
plt.subplot(2, 1, 2)
plt.hist(group1, 10, density=True, alpha=0.7, label="Top 5 Countries")
plt.hist(group2, 10, density=True, alpha=0.7, label="Bottom 5 Countries")
plt.axvline(population1.mean(), color='k', linestyle='dashed', linewidth=1)
plt.axvline(population2.mean(), color='k', linestyle='dashed', linewidth=1)
plt.legend()  
    
    #return group1, group2
    