<center>
<img  src='https://images-ext-2.discordapp.net/external/dH_wMFcpccO5xRSP7dPvdn4VgNbETdeO_hZvSmataJM/https/logos-world.net/wp-content/uploads/2021/03/World-Health-Organization-WHO-Logo.png' alt="World Health Organisation" width="400px"/>

<br />

<font size="6em">Life Expectancy Predictive Model - Function</font>

<font size="5em"><b>WHO R YOU?</b> <i>Analytics</i></font>

</center>

<br /><br />

- Ed Boynton
- Prasharn Selvaranjan
- Alistair Boyer
- Elif Varli

<br /><br /><br />
# Function Setup

### Imports

In [19]:
import pandas as pd
import numpy as np

### Model

In [20]:
params_consent = {
    'Adult_mortality': -0.04863896301319667,
    'Economy_status_Developed': 0.8143198198512267,
    'Under_five_deaths': -0.08177470514965632,
    'GDP_per_capita': 2.6787843335664065e-05,
    'Alcohol_consumption': 0.07480319207717549,
    'Schooling': 0.09735207109060469,
    'BMI': -0.12977701739270453,
    'Incidents_HIV': 0.10770853938625982,
    'Thinness_ten_nineteen_years': -0.03981550592431052,
    'const': 83.47233686963484,
}

params_privacy = {
    'Adult_mortality_bucketised': -5.177170571783544,
    'Schooling_bucketised': 1.2221083193080096,
    'GDP_per_capita': -3.862883741217992e-05,
    'Alcohol_consumption': 0.15704042731889625,
    'BMI': 0.3086158898451559,
    'const': 68.69898530958652,
}

In [21]:
bin_edges = {
    'Adult_mortality': [-1e+20, 94.6415, 141.9155, 190.0275, 272.4865, 1e+20],
    'Schooling': [-1e+20, 4.4, 6.8, 8.9, 10.8, 1e+20],
}

### Questions

In [22]:
region_list = ['Africa', 'Asia', 'Central America and Caribbean', 'European Union', 'Middle East', 'North America', 'Oceania', 'Rest of Europe',  'South America']

def check_float_range(value, minimum, maximum):
    value = float(value)
    if minimum is not None:
        assert value >= minimum
    if maximum is not None:
        assert value <= maximum
    return value


questions = {
  "Region": {
    "text": f"What world region is the country in?\nChoose from {'; '.join(region_list)}",
    "validator": lambda x: x.title() if (x.title() in region_list) else None,
  },
  "Year": {
    "text": "What is the year of interest?",
    "validator": lambda x: check_float_range(x, 0, 3000),
  },
  "Infant_deaths": {
    "text": "What is the infant mortality [deaths of under 1 year olds per 1000 population]?",
    "validator": lambda x: check_float_range(x, 0, 1000),
  },
  "Under_five_deaths": {
    "text": "What is the child mortality [deaths of under 5 year olds per 1000 population]?",
    "validator": lambda x: check_float_range(x, 0, 1000),
  },
  "Adult_mortality": {
    "text": "What is the adult mortality [deaths of 15-60 years olds per 1000 population]?",
    "validator": lambda x: check_float_range(x, 0, 1000),
  },
  "Alcohol_consumption": {
    "text": "What is the alcohol consumption? [litres per capita (15+)]?",
    "validator": lambda x: check_float_range(x, 0, None),
  },
  "Hepatitis_B": {
    "text": "What is the % hepatitis B [hepB] immunization coverage for 1 year olds?",
    "validator": lambda x: check_float_range(x, 0, 100),
  },
  "Measles": {
    "text": "What is the prevelance of measles [reported cases per 1000 population]?",
    "validator": lambda x: check_float_range(x, 0, 1000),
  },
  "BMI": {
    "text": "What is the average Body Mass Index of the entire population?",
    "validator": lambda x: check_float_range(x, 0, None),
  },
  "Polio": {
    "text": "What is the % polio [Pol3] immunization coverage for 1 year olds?",
    "validator": lambda x: check_float_range(x, 0, 100),
  },
  "Diphtheria": {
    "text": "What is the % diphtheria tetanus toxoid and pertussis (DTP3) immunization coverage for 1 year olds?",
    "validator": lambda x: check_float_range(x, 0, 100),
  },
  "Incidents_HIV": {
    "text": "What is the incidents of 0-4 year olds HIV/AIDS deaths [deaths per 1000 live births]?",
    "validator": lambda x: check_float_range(x, 0, 1000),
  },
  "GDP_per_capita": {
    "text": "What is the Gross Domestic Product per capita [in USD]?",
    "validator": lambda x: check_float_range(x, 0, None),
  },
  "Population_mln": {
    "text": "What is the population of the country [millions]?",
    "validator": lambda x: check_float_range(x, 0, None),
  },
  "Thinness_ten_nineteen_years": {
    "text": "What is the prevalence (%) of thinness among children and adolescents for age 10 to 19 [inclusive]?",
    "validator": lambda x: check_float_range(x, 0, 100),
  },
  "Thinness_five_nine_years": {
    "text": "What is the prevalence (%) of thinness among children for age 5 to 9 [inclusive]?",
    "validator": lambda x: check_float_range(x, 0, 100),
  },
 "Schooling": {
    "text": "What is the average number of years a child spends in school?",
    "validator": lambda x: check_float_range(x, 0, 18),
  },
  "Economy_status_Developed": {
    "text": "Is the country classed as developed [Y/N]?",
    "validator": lambda x: 1 if x.lower()=="y" else (0 if x.lower()=="n" else None)
  },
}

### Get Reponses

In [23]:
consent_question = "Do you consent to using advanced population data, which may include protected information, for better accuracy? (Y/N)\n"


def _get_responses():
    """ Get input for predicting life expectancy. """

    print('Enter "Q" at any time to quit.')
    while True:

        # ask consent question
        model_selector = input(consent_question).lower()

        if model_selector == 'y':
            columns = params_consent.keys()
            break
        elif model_selector == 'n':
            columns = params_privacy.keys()
            break
        elif model_selector == 'q':
            print ("Thank you!")
            return None, None

        print('Unexpected value, please enter Y/N; or Q to quit.')

    # get responses
    responses = dict()
    for column in columns:

        if column == "const":
            continue


        while True:

            # get question for bucketised values
            if column.endswith("_bucketised"):
                # ask question and get response using _get_bucket_input
                print(questions[column[:-11]]['text'])
                responses[column] = _get_bucket_input(column[:-11])
                # got the response so break for next column
                break


            # get question from questions dict
            response = input(questions[column]['text']+"\n").lower()

            # quit case
            if response == "q":
                print ("Thank you!")
                return None, None


            try:
                # get vaildator from questions dict
                validator = questions[column]['validator']
                # perform validation
                response = validator(response)
                if response is None:
                    raise ValueError()
                # save response
                responses[column] = response
                # break loop
                break

            # catch any error and ask again
            except Exception:
                print ("Please enter a valid response; or Q to quit.")

    return responses, model_selector == 'y'

In [24]:
def _get_bucket_input(column_label):
    """ Get bucketised input for predicting life expectancy. """

    # load the edges form the bin_edges
    edges = bin_edges.get(column_label)

    # print the first bin
    print (f'{1}) <= {edges[1]}')

    # print the other bins
    for n in range(2, len(edges)-1):
        print (f'{n}) {edges[n-1]} - {edges[n]}')

    # print the last bin
    print (f'{n+1}) > {edges[-2]}')

    # get input
    while True:
        response = input().lower()

        # quit response
        if response=="q":
            return "q"

        # see if reponse is bucket index
        try:
            response = int(response) - 1
            edges[response]
            return response

        except Exception:
            pass


### Preparing Data

In [25]:
def engineer_features(dataframe):
    """Feature engineering for WHO life expectancy data."""

    # GLOBALS (read only but a reminder)
    global bin_edges

    # take copy of dataframe
    dataframe = dataframe.copy()

    # bucketising columns
    for column_label, edges in bin_edges.items():
        if column_label in dataframe:
            dataframe[f'{column_label}_bucketised'] = pd.cut(
                dataframe[column_label], bins=edges,
                    labels=False, retbins=False, include_lowest=True)

    # add in constant
    dataframe['const'] = 1.0

    return dataframe

## Modelling Functions

In [26]:
def _calculate_life_expectancy(dataframe, consent):
    """ Generate life expectancy depending on consent. """
    # create const value
    dataframe['const'] = 1.0

    # check for consent
    if consent:
        # run consent model
        return _calculate_life_expectancy_consent(dataframe)
    # run privacy model
    return _calculate_life_expectancy_privacy(dataframe)


def _calculate_life_expectancy_consent(dataframe):
    """ Generate life expectancy using data gathered with full consent. """
    print("=" * 80)
    print("Consent mode life expectancy prediction")
    # do prediction
    life_expectancy = (pd.DataFrame(params_consent, index=[-1]) * dataframe).sum().sum()
    print(f'{life_expectancy:.1f}')
    return life_expectancy


def _calculate_life_expectancy_privacy(dataframe):
    """ Generate life expectancy using data gathered with limited consent. """
    print("=" * 80)
    print("Privacy mode life expectancy prediction")
    # do prediction
    life_expectancy = (pd.DataFrame(params_privacy, index=[-1]) * dataframe).sum().sum()
    print(f'{life_expectancy:.1f}')
    return life_expectancy

# Predict Life Expectancy

In [11]:
def predict_life_expectancy() -> float:
    """
    Generate life expectancy by asking specific questions.
    Can choose level of consent for supplying data.
    Returns life expectancy as float.
    """
    responses, consent = _get_responses()
    if responses is None:
        return
    return _calculate_life_expectancy(pd.DataFrame(responses, index=[-1]), consent)


In [None]:
# Sample values for UK im 2014
{'Country': 'United Kingdom',
 'Region': 'Rest of Europe',
 'Year': 2014,
 #
 'Adult_mortality': 68.7755,
 'Economy_status_Developed': 1,
 'Under_five_deaths': 4.5,
 'GDP_per_capita': 44596,
 'Alcohol_consumption': 9.45,
 'Schooling': 12.7,
 'BMI': 27.1,
 'Incidents_HIV': 0.08,
 'Thinness_ten_nineteen_years': 0.8,
 'Life_expectancy': 81.3}

In [27]:
le = predict_life_expectancy()
# UK 2014 test values - Life Expectancy = 81.3

Enter "Q" at any time to quit.
Do you consent to using advanced population data, which may include protected information, for better accuracy? (Y/N)
y
What is the adult mortality [deaths of 15-60 years olds per 1000 population]?
68.7755
Is the country classed as developed [Y/N]?
y
What is the child mortality [deaths of under 5 year olds per 1000 population]?
4.5
What is the Gross Domestic Product per capita [in USD]?
-44596
Please enter a valid response; or Q to quit.
What is the Gross Domestic Product per capita [in USD]?
44596
What is the alcohol consumption? [litres per capita (15+)]?
9.45
What is the average number of years a child spends in school?
12.7
What is the average Body Mass Index of the entire population?
27.1
What is the incidents of 0-4 year olds HIV/AIDS deaths [deaths per 1000 live births]?
0.08
What is the prevalence (%) of thinness among children and adolescents for age 10 to 19 [inclusive]?
0.8
Consent mode life expectancy prediction
80.2


In [28]:
le = predict_life_expectancy()
# UK 2014 test values - Life Expectancy = 81.3

Enter "Q" at any time to quit.
Do you consent to using advanced population data, which may include protected information, for better accuracy? (Y/N)
N
What is the adult mortality [deaths of 15-60 years olds per 1000 population]?
1) <= 94.6415
2) 94.6415 - 141.9155
3) 141.9155 - 190.0275
4) 190.0275 - 272.4865
5) > 272.4865
1
What is the average number of years a child spends in school?
1) <= 4.4
2) 4.4 - 6.8
3) 6.8 - 8.9
4) 8.9 - 10.8
5) > 10.8
5
What is the Gross Domestic Product per capita [in USD]?
44596
What is the alcohol consumption? [litres per capita (15+)]?
9.45
What is the average Body Mass Index of the entire population?
27.1
Privacy mode life expectancy prediction
81.7
