# World Health Organisation Predictive Modelling project

<img src="https://media.istockphoto.com/id/1363197264/photo/world-health-organization-and-healthcare.jpg?s=612x612&w=0&k=20&c=QqW9vR5axeNVPqImWnIvECciJXR7Y50tBerUo3TQPcs=">

### Life Expectancy Predictor Function

In [4]:
def Life_expectancy_predictor():
    """This function estimates the average life expectancy in a country based on user inputs."""

    # Import the packages we need
    import numpy as np
    import pandas as pd
    import pickle

    # Display a welcome message
    print("Welcome to the WHO Life Expectancy Predictor!")

    # Ask the user if they are willing to share sensitive data
    consent = input("Do you consent to using sensitive data for a more accurate life expectancy prediction? \n Y \n N ")

    # If they consent to sharing sensitive data
    if consent.lower().strip() == 'y':
        
        # Display a thank you message 
        print("Thank you, you are now using the complex life expectancy calculator.")
        
        # Create a dictionary to hold the data for the complex model
        data_dict = {'Adult_mortality': [0], 'const': [1], 'Economy_status_Developed': [0], 
                     'Reg_Central America and Caribbean': [0], 'Reg_South America': [0], 'Under_five_deaths': [0], 
                     'GDP_per_capita': [0], 'Reg_Oceania': [0], 'Reg_European Union': [0], 
                     'Schooling': [0], 'BMI': [0], 'Incidents_HIV': [0]}

        # Ask the user which region their country is in
        reg = int(input("What region of the world do you live in? Please enter the number for one of the following: \n1 - Middle East, \n2 - European Union, \n3 - Asia, \n4 - South America,\n5 - Central America and Caribbean, \n6 - Rest of Europe, \n7 - Africa, \n8 - Oceania, \n9 - North America "))
        if (reg == 1) | (reg == 3) | (reg == 6) | (reg == 7) | (reg == 9):
            pass
        elif reg == 2:
            data_dict['Reg_European Union'] = [1]
        elif reg == 4:
            data_dict['Reg_South America'] = [1]
        elif reg == 5:
            data_dict['Reg_Central America and Caribbean'] = [1]
        elif reg == 8:
            data_dict['Reg_Oceania'] = [1]
        else:
            raise BaseException("Invalid input: status must be a number 1 to 9")
        
        # Ask the user for their country's GDP
        gdp_input = input("What is the GDP per capita in your country? /USD: ")
        try:
            gdp_per_capita = float(gdp_input)
            if (gdp_per_capita <= 1124180) & (gdp_per_capita >= 0):
                data_dict['GDP_per_capita'] = gdp_per_capita
            else:
                raise BaseException("Invalid input: GDP per capita must be between 148 and 112418 USD")
        except:
            print("Invalid input. Please enter a numeric value.")
        
        # Ask the user for the length of schooling in their country
        sch = input("How many years of schooling is provided in your country? ")
        try:
            sch = float(sch)
            if (sch <= 50) & (sch >= 0):
                data_dict['Schooling'] = sch
            else:
                raise BaseException("Invalid input: Must be a number between 0 and 50.")
        except:
            print("Please enter a number.")
        
        # Ask the user if their country is developed or developing
        status= int(input("Is the economic status of your country developed or developing? \n1 - developed \n2 - developing" ))
        if status == 1:
            data_dict['Economy_status_Developed'] = 1
        elif status == 2:
            data_dict['Economy_status_Developed'] = 0
        else:
            raise BaseException("Invalid input: status must be 1 or 2")

        # Ask the user for the average BMI in their country
        bmi = input("What is the average BMI in your country? ")
        try:
            bmi = float(bmi)
            if (bmi <= 50) & (bmi >= 0):
                data_dict['BMI'] = bmi
            else:
                raise BaseException("Invalid input: Check the value.") 
        except:
            print("Please enter a number.")

        # Ask the user for the adult mortality rate
        am = input("What is the adult mortality rate (per 1000) in your country?")
        try:
            am = float(am)
            if (am <= 1000) & (am >= 0):
                data_dict['Adult_mortality'] = am
            else:
                raise BaseException("Invalid input: Must be a number 0 to 1000")
        except:
            print("Please enter a number.")

        # Ask the user for the rate of under-five deaths in their country
        ufd = input("What is the number of under-five deaths per 1000 population? ")
        try:
            ufd = float(ufd)
            if (ufd <= 1000) & (ufd >= 0):
                data_dict['Under_five_deaths'] = ufd
            else:
                raise BaseException("Invalid input: Must be a number 0 to 1000")
        except:
            print("Please enter a number.")

        # Ask the user for the mortality rate of 0-4-year-olds due to HIV        
        hiv = input("What is the number of deaths (for ages 0-4 years) per 1000 live births due to HIV/AIDS?")
        try:
            hiv = float(hiv)
            if (hiv <= 1000) & (hiv >= 0):
                data_dict['Incidents_HIV'] = hiv
            else:
                raise BaseException("Invalid input: Must be a number 0 to 1000")
        except:
            print("Please enter a number.")

        # Convert the dictionary to a Pandas DataFrame
        df = pd.DataFrame(data_dict)

        # A list of the columns - we'll need this when we convert the data to a DataFrame after scaling
        final_cols = ['Adult_mortality', 'const', 'Economy_status_Developed',
                   'Reg_Central America and Caribbean', 'Reg_South America', 'Under_five_deaths',
                   'GDP_per_capita', 'Reg_Oceania', 'Reg_European Union', 'Schooling', 'BMI',
                   'Incidents_HIV']
        
        # Load the pickle that contains the scaler
        with open('minmax_scaler.pkl', 'rb') as file:
            loaded_scaler = pickle.load(file)
        
        # Scale the data and save it as a Pandas DataFrame
        df_scaled = pd.DataFrame(loaded_scaler.transform(df), columns = final_cols)
        
        # The scaling changed the constant to 0, so we need to change it back to 1 (the model needs it to be 1)
        df_scaled['const'] = 1
        
        # Load the pickle that contains the regression model
        with open('linear_regression_model.pkl', 'rb') as file:
            loaded_model = pickle.load(file)
        
        # Use the model to predict the life expectancy
        prediction = loaded_model.predict(df_scaled)
        
        # Display the prediction and thank the user
        print(f"\nThe average life expectancy in your country is {prediction[0]: .1f} years.")
        print("Thank you for using the WHO Life Expectancy Predictor.")

    # If the user doesn't consent to sharing sensitive data
    elif consent.lower().strip() == 'n':
        
        # Display a thank you message 
        print("Thank you, you are now using the simple life expectancy calculator.")

        # Create a dictionary to hold the data for the complex model
        data_dict = {'Adult_mortality': [0], 'const': [1], 'Economy_status_Developed': [0], 
                     'Reg_Central America and Caribbean': [0], 'Reg_South America': [0], 'Under_five_deaths': [0], 
                     'GDP_per_capita': [0], 'Reg_Oceania': [0], 'Reg_European Union': [0], 
                     'Schooling': [0], 'BMI': [0], 'Incidents_HIV': [0]}
        
        # Convert the dictionary to a Pandas DataFrame
        df = pd.DataFrame(data_dict)

        # Ask the user for their country's GDP
        gdp_input = input("What is the GDP per capita in your country? /USD: ")
        try:
            gdp_per_capita = float(gdp_input)
            if (gdp_per_capita <= 1124180) & (gdp_per_capita >= 0):
                data_dict['GDP_per_capita'] = gdp_per_capita
            else:
                raise BaseException("Invalid input: GDP per capita must be between 148 and 112418 USD")
        except:
            print("Invalid input. Please enter a numeric value.")

        # A list of the columns - we'll need this when we convert the data to a DataFrame after scaling
        final_cols = ['Adult_mortality', 'const', 'Economy_status_Developed',
                   'Reg_Central America and Caribbean', 'Reg_South America', 'Under_five_deaths',
                   'GDP_per_capita', 'Reg_Oceania', 'Reg_European Union', 'Schooling', 'BMI',
                   'Incidents_HIV']
        
        # Load the pickle that contains the scaler
        with open('minmax_scaler.pkl', 'rb') as file:
            loaded_scaler = pickle.load(file)
        
        # Scale the data and save it as a Pandas DataFrame
        df_scaled = pd.DataFrame(loaded_scaler.transform(df), columns = final_cols)
        
        # The scaling changed the constant to 0, so we need to change it back to 1 (the model needs it to be 1)
        df_scaled['const'] = 1
        
        # Drop the columns we won't need for the prediction
        df_scaled = df_scaled.drop(columns = 'Adult_mortality')
        df_scaled = df_scaled.drop(columns = 'Economy_status_Developed')
        df_scaled = df_scaled.drop(columns = 'Under_five_deaths')
        df_scaled = df_scaled.drop(columns = 'Schooling')
        df_scaled = df_scaled.drop(columns = 'BMI')
        df_scaled = df_scaled.drop(columns = 'Incidents_HIV')
        
        # Create new columns that we will need for the prediction
        df_scaled['Reg_Middle East'] = [0]
        df_scaled['Reg_Asia'] = [0]
        df_scaled['Reg_Rest of Europe'] = [0]
        df_scaled['Reg_Oceania'] = [0]
        df_scaled['North America'] = [0]

        # Ask the user which region their country is in
        reg = int(input("What region of the world do you live in? Please enter the number for one of the following: \n1 - Middle East, \n2 - European Union, \n3 - Asia, \n4 - South America,\n5 - Central America and Caribbean, \n6 - Rest of Europe, \n7 - Africa, \n8 - Oceania, \n9 - North America "))
        if reg == 1:
            df_scaled['Reg_Middle East'] = 1
        elif reg == 2:
            df_scaled['Reg_European Union'] = 1
        elif reg == 3:
            df_scaled['Reg_Asia'] = [1]
        elif reg == 4:
            df_scaled['Reg_South America'] = [1]
        elif reg == 5:
            df_scaled['Reg_Central America and Caribbean'] = [1]
        elif reg == 6:
            df_scaled['Reg_Rest of Europe'] = [1]
        elif reg == 7:
            pass
        elif reg == 8:
            df_scaled['Reg_Oceania'] = [1]
        elif reg == 9:
            df_scaled['North America'] = [1]
        else:
            raise BaseException("Invalid input: status must be a number 1 to 9")

        # Use the model to predict the life expectancy
        with open('simple_linear_regression_model.pkl', 'rb') as file:
            loaded_model = pickle.load(file)
        
        # Display the prediction and thank the user
        prediction = loaded_model.predict(df_scaled)
        print(f"\nThe average life expectancy in your country is {prediction[0]: .1f} years.\nThank you for using the WHO Life Expectancy Predictor.")

    # Display a message if the user doesn't enter y or n
    else:
        print("Please enter y or n only.")
        

In [5]:
Life_expectancy_predictor()

Welcome to the WHO Life Expectancy Predictor!


Do you consent to using sensitive data for a more accurate life expectancy prediction? 
 Y 
 N  y


Thank you, you are now using the complex life expectancy calculator.


What region of the world do you live in? Please enter the number for one of the following: 
1 - Middle East, 
2 - European Union, 
3 - Asia, 
4 - South America,
5 - Central America and Caribbean, 
6 - Rest of Europe, 
7 - Africa, 
8 - Oceania, 
9 - North America  5
What is the GDP per capita in your country? /USD:  4667
How many years of schooling is provided in your country?  10.5
Is the economic status of your country developed or developing? 
1 - developed 
2 - developing 2
What is the average BMI in your country?  28.7
What is the adult mortality rate (per 1000) in your country? 179
What is the number of under-five deaths per 1000 population?  16.9
What is the number of deaths (for ages 0-4 years) per 1000 live births due to HIV/AIDS? 0.39



The average life expectancy in your country is  72.5 years.
Thank you for using the WHO Life Expectancy Predictor.
