# World Health Organisation

## The Function

#### This function is case insensitive and repromts users to insert numeric only

In [1]:
# importing necessary libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [2]:
def input_in_range(prompt: str, min_value: float, max_value = None):
    '''This function prompts for user input in a valid range'''
    while True:
        try:
            # Form prompt including range
            prompt_str = f"{prompt} [{min_value}, {max_value}]:" if max_value else f"{prompt} [> {min_value}]"
            # Ask for the string from the user
            data_str = input(prompt_str)
            # If nothing was entered - return None
            if data_str == '':
                return None
            # Try converting to float
            data = float(data_str)
            # Check the range
            if data < min_value:
                continue
            if max_value is not None:
                if data > max_value:
                    continue
            # If all was fine return the value
            return data
        except ValueError:
            continue
            
def input_as_bool(prompt: str):
    '''Returns 0 if entered N otherwise 1'''
    while True:
        string = input(f"{prompt} [Y/N]")
        if string == '':
            return None
        if string.upper() == 'Y':
            return 1
        if string.upper() == 'N':
            return 0

In [3]:
## The function that can run DOT99's model on any data provided

def Life_expectancy_model(minimal_model,bp_model):
    '''This function is case sensitive - so please follow the input guidelines'''
    coefs=[]
    data_lst=[]
    sensitive=input("Do you consent to using advanced population data, which may include protected information, for better accuracy (Y/N)?")
    if sensitive.upper()=='N': 
        model=minimal_model # if answer no, minimalistic model to be used
    else:
        model=bp_model # if answer yes, best performing model
        
    for i in range(1,model.Features.count()): # create a loop that allows user to specify which columns they have data for and can then create 
        # a new set of columns in the model for the target to be predicted
        # Features represents the column name in the models dataframe - with the other column called Coefficient
        feature = model['Features'][i]
        prompt = f"Please enter the data for {feature}"
        
        match feature:
            case 'Year_rob':
                data = input_in_range(prompt, 2000, 2035)
            case 'Hepatitis_B' | 'Thinness_ten_nineteen_years':
                data = input_in_range(prompt, 0, 100)
            case 'Infant_deaths' | 'Under_five_deaths' | 'Adult_mortality' | 'Incidents_HIV':
                data = input_in_range(prompt, 0, 1000)
            case 'BMI' | 'GDP_rob' | 'Schooling':
                data = input_in_range(prompt, 0)
            case _:
                data = input_as_bool(prompt)
        
        # Check the entered result and skip if nothing was entered
        if data is None:
            continue
        
        # Add coefficient
        coefs.append(model.Coefficient[i])
        
        # Cover special cases for year and gdp and scale them
        match feature:
            case 'Year_rob':
                data = (data-2007.5)/(2011.25-2003.75) # robust scaling formula with median, Q3 and Q1 values of Year
            case 'GDP_rob':
                data = (data-4217)/(12557-1415.75) # robust scaling formual with median, Q3 and Q1 values of GDP
        
        # Append data
        data_lst.append(data)
        
    # y=mx+c
    life_expectancy_pred=[] # creating a list to be summed to get target predictions
    life_expectancy_pred.append(model.Coefficient[0]) # add constant from the chosen model into list
    for j in range(len(coefs)):
        life_expectancy_pred.append(coefs[j]*data_lst[j]) # append each multiplication of coefficient and data into list
        
    return round(sum(life_expectancy_pred), 1) # sum and round the values
    # in the list to give prediction of life expectancy
    # y=c+mx(year)+mx(bmi)+...+...+...

In [4]:
# opening up the best performing models 

bp_model=pd.read_csv('bp_model.csv')
bp_model

Unnamed: 0,Features,Coefficient
0,const,83.32353
1,Year_rob,0.255034
2,Infant_deaths,-0.057525
3,Under_five_deaths,-0.047062
4,Adult_mortality,-0.046741
5,Hepatitis_B,-0.006387
6,BMI,-0.118397
7,Incidents_HIV,0.101031
8,GDP_rob,0.211037
9,Thinness_ten_nineteen_years,-0.016427


In [5]:
minimal_model=pd.read_csv('minimal_model.csv')
minimal_model

Unnamed: 0,Features,Coefficient
0,const,79.650756
1,Year_rob,0.199487
2,Infant_deaths,-0.065692
3,Under_five_deaths,-0.041486
4,Adult_mortality,-0.045164
5,Schooling,0.105802
6,Economy_status_Developed,3.450456
7,Region_Asia,0.415737
8,Region_Central America and Caribbean,1.843418
9,Region_European Union,-1.223869


In [None]:

expected_death_age = Life_expectancy_model(minimal_model,bp_model)
print(f"{expected_death_age} is the life expectancy predicted (rounded to 1 dp)")


Do you consent to using advanced population data, which may include protected information, for better accuracy (Y/N)? y
Please enter the data for Year_rob [2000, 2035]: mkphf


Finland (2012) 78.4