# Preparation of IHME Forecast Data

### 20211229_IHME_Forecast_Data.xlsx
This notebook prepares the IHME Forecast Data from `20211229_IHME_Forecast_Data.xlsx`.

In [1]:
# Load required packages
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import os

## Load data

In [2]:
# Read in data frame
ihme_forecast = pd.read_excel("../../0_raw_data/novartis_data/20211229_IHME_Forecast_Data.xlsx")

# Look at entire data frame
ihme_forecast

Unnamed: 0,Condition,Sex,Age,Location,Measure,Unit,Forecast Scenario,Year,Value,Lower,Upper,Data Suite
0,Breast cancer,Male,All Ages,Sweden,Deaths,Number,Past estimate,2012,19.060636,16.226380,23.116195,Forecast Data
1,Breast cancer,Female,All Ages,Sweden,Deaths,Number,Past estimate,2012,1652.227842,1534.316920,1778.104185,Forecast Data
2,Malignant skin melanoma,Male,All Ages,Sweden,Deaths,Number,Past estimate,2012,331.162019,271.230442,364.941671,Forecast Data
3,Malignant skin melanoma,Female,All Ages,Sweden,Deaths,Number,Past estimate,2012,248.942360,151.215649,287.126493,Forecast Data
4,Breast cancer,Male,All Ages,Sweden except Stockholm,Deaths,Number,Past estimate,2012,15.235187,12.956689,18.975009,Forecast Data
...,...,...,...,...,...,...,...,...,...,...,...,...
1363,Malignant skin melanoma,Male,All Ages,Sweden,Incidence,"Rate (per 100,000)",Better forecast,2030,48.601690,,,Forecast Data
1364,Malignant skin melanoma,Female,All Ages,Sweden,Incidence,"Rate (per 100,000)",Reference forecast,2030,31.153374,,,Forecast Data
1365,Malignant skin melanoma,Male,All Ages,Sweden,Incidence,"Rate (per 100,000)",Reference forecast,2030,44.245966,,,Forecast Data
1366,Malignant skin melanoma,Female,All Ages,Sweden,Incidence,"Rate (per 100,000)",Worse forecast,2030,30.332263,,,Forecast Data


## Preparatory steps

In [3]:
# Rename all columns
ihme_forecast = ihme_forecast.rename(columns = 
                                     {"Condition": "condition", 
                                      "Sex": "sex", 
                                      "Age": "age", 
                                      "Location": "location",     
                                      "Measure": "measure", 
                                      "Unit": "unit", 
                                      "Forecast Scenario": "forecast_scenario", 
                                      "Year": "year", 
                                      "Value": "value", 
                                      "Lower": "lower", 
                                      "Upper": "upper", 
                                      "Data Suite": "data_suite"})

In [4]:
# Drop irrelevant columns
ihme_forecast.drop(["age", "data_suite"], axis = 1, inplace = True) 

In [5]:
# Cast to appropriate data type
ihme_forecast["condition"] = ihme_forecast["condition"].astype('category')
ihme_forecast["sex"] = ihme_forecast["sex"].astype('category')
ihme_forecast["location"] = ihme_forecast["location"].astype('category')
ihme_forecast["measure"] = ihme_forecast["measure"].astype('category')
ihme_forecast["unit"] = ihme_forecast["unit"].astype('category')
ihme_forecast["forecast_scenario"] = ihme_forecast["forecast_scenario"].astype('category')
ihme_forecast["year"] = ihme_forecast["year"].astype('int')
ihme_forecast["value"] = ihme_forecast["value"].astype('float')
ihme_forecast["lower"] = ihme_forecast["lower"].astype('float')
ihme_forecast["upper"] = ihme_forecast["upper"].astype('float')

In [6]:
ihme_forecast

Unnamed: 0,condition,sex,location,measure,unit,forecast_scenario,year,value,lower,upper
0,Breast cancer,Male,Sweden,Deaths,Number,Past estimate,2012,19.060636,16.226380,23.116195
1,Breast cancer,Female,Sweden,Deaths,Number,Past estimate,2012,1652.227842,1534.316920,1778.104185
2,Malignant skin melanoma,Male,Sweden,Deaths,Number,Past estimate,2012,331.162019,271.230442,364.941671
3,Malignant skin melanoma,Female,Sweden,Deaths,Number,Past estimate,2012,248.942360,151.215649,287.126493
4,Breast cancer,Male,Sweden except Stockholm,Deaths,Number,Past estimate,2012,15.235187,12.956689,18.975009
...,...,...,...,...,...,...,...,...,...,...
1363,Malignant skin melanoma,Male,Sweden,Incidence,"Rate (per 100,000)",Better forecast,2030,48.601690,,
1364,Malignant skin melanoma,Female,Sweden,Incidence,"Rate (per 100,000)",Reference forecast,2030,31.153374,,
1365,Malignant skin melanoma,Male,Sweden,Incidence,"Rate (per 100,000)",Reference forecast,2030,44.245966,,
1366,Malignant skin melanoma,Female,Sweden,Incidence,"Rate (per 100,000)",Worse forecast,2030,30.332263,,


In [7]:
# Sort data frame 
ihme_forecast = ihme_forecast.sort_values(by = ['condition', 'sex', 'location', 'measure', 'unit', 
                                                'year']).reset_index(drop = True)

In [8]:
ihme_forecast

Unnamed: 0,condition,sex,location,measure,unit,forecast_scenario,year,value,lower,upper
0,Breast cancer,Female,Stockholm,Deaths,Number,Past estimate,2012,304.111590,268.215670,345.118850
1,Breast cancer,Female,Stockholm,Deaths,Number,Past estimate,2013,309.517566,266.763740,360.426139
2,Breast cancer,Female,Stockholm,Deaths,Number,Past estimate,2014,311.341068,258.490163,378.015392
3,Breast cancer,Female,Stockholm,Deaths,Number,Past estimate,2015,319.089186,250.521211,411.223639
4,Breast cancer,Female,Stockholm,Deaths,Number,Past estimate,2016,323.804851,239.637639,433.362297
...,...,...,...,...,...,...,...,...,...,...
1363,Malignant skin melanoma,Male,Sweden except Stockholm,Prevalence,"Rate (per 100,000)",Past estimate,2012,160.516613,132.304471,181.866381
1364,Malignant skin melanoma,Male,Sweden except Stockholm,Prevalence,"Rate (per 100,000)",Past estimate,2013,162.355372,135.270090,183.791722
1365,Malignant skin melanoma,Male,Sweden except Stockholm,Prevalence,"Rate (per 100,000)",Past estimate,2014,162.523317,135.478271,182.577034
1366,Malignant skin melanoma,Male,Sweden except Stockholm,Prevalence,"Rate (per 100,000)",Past estimate,2015,165.550626,137.039454,186.263973


In [9]:
# Save the prepared data frame
route0 = "../processed_data"

if not os.path.exists(route0):
    os.mkdir(route0)
    
print("saving file corresponding to ihme_forecasts.pkl")
ihme_forecast.to_pickle(f"{route0}/ihme_forecasts.pkl")
pd.read_pickle(f"{route0}/ihme_forecasts.pkl")

saving file corresponding to ihme_forecasts.pkl


Unnamed: 0,condition,sex,location,measure,unit,forecast_scenario,year,value,lower,upper
0,Breast cancer,Female,Stockholm,Deaths,Number,Past estimate,2012,304.111590,268.215670,345.118850
1,Breast cancer,Female,Stockholm,Deaths,Number,Past estimate,2013,309.517566,266.763740,360.426139
2,Breast cancer,Female,Stockholm,Deaths,Number,Past estimate,2014,311.341068,258.490163,378.015392
3,Breast cancer,Female,Stockholm,Deaths,Number,Past estimate,2015,319.089186,250.521211,411.223639
4,Breast cancer,Female,Stockholm,Deaths,Number,Past estimate,2016,323.804851,239.637639,433.362297
...,...,...,...,...,...,...,...,...,...,...
1363,Malignant skin melanoma,Male,Sweden except Stockholm,Prevalence,"Rate (per 100,000)",Past estimate,2012,160.516613,132.304471,181.866381
1364,Malignant skin melanoma,Male,Sweden except Stockholm,Prevalence,"Rate (per 100,000)",Past estimate,2013,162.355372,135.270090,183.791722
1365,Malignant skin melanoma,Male,Sweden except Stockholm,Prevalence,"Rate (per 100,000)",Past estimate,2014,162.523317,135.478271,182.577034
1366,Malignant skin melanoma,Male,Sweden except Stockholm,Prevalence,"Rate (per 100,000)",Past estimate,2015,165.550626,137.039454,186.263973
