# Preparation of IHME Actuals Data

### 20211229_IHME_GBD2019.xlsx
This notebook prepares the IHME Actuals Data from `20211229_IHME_GBD2019.xlsx`.

In [1]:
# Load required packages
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import os

## Load data

In [2]:
# Read in data frame
ihme_actuals = pd.read_excel("../../0_raw_data/novartis_data/20211229_IHME_GBD2019.xlsx")

# Look at entire data frame
ihme_actuals

Unnamed: 0,Condition,Sex,Age,Location,Measure,Unit,Year,Value,Lower,Upper,Data Suite
0,Breast cancer,Male,All Ages,Sweden,Deaths,Number,2012,11.154988,9.988676,12.488940,GBD 2019
1,Breast cancer,Female,All Ages,Sweden,Deaths,Number,2012,1952.187085,1775.073238,2072.814309,GBD 2019
2,Malignant skin melanoma,Male,All Ages,Sweden,Deaths,Number,2012,358.542147,183.268982,401.445608,GBD 2019
3,Malignant skin melanoma,Female,All Ages,Sweden,Deaths,Number,2012,256.808232,139.468152,288.756416,GBD 2019
4,Breast cancer,Male,All Ages,Sweden except Stockholm,Deaths,Number,2012,7.199021,6.190559,8.372396,GBD 2019
...,...,...,...,...,...,...,...,...,...,...,...
955,Metastatic phase of breast cancer,Female,All Ages,Sweden,Incidence,"Rate (per 100,000)",2019,0.000000,0.000000,0.000000,GBD 2019
956,Metastatic phase of malignant skin melanoma,Male,All Ages,Sweden,Incidence,"Rate (per 100,000)",2019,0.000000,0.000000,0.000000,GBD 2019
957,Metastatic phase of malignant skin melanoma,Female,All Ages,Sweden,Incidence,"Rate (per 100,000)",2019,0.000000,0.000000,0.000000,GBD 2019
958,Terminal phase of malignant skin melanoma,Male,All Ages,Sweden,Incidence,"Rate (per 100,000)",2019,0.000000,0.000000,0.000000,GBD 2019


## Preparatory steps

In [3]:
# Rename all columns
ihme_actuals = ihme_actuals.rename(columns = 
                                   {"Condition": "condition", 
                                    "Sex": "sex", 
                                    "Age": "age", 
                                    "Location": "location",     
                                    "Measure": "measure", 
                                    "Unit": "unit",  
                                    "Year": "year", 
                                    "Value": "value", 
                                    "Lower": "lower", 
                                    "Upper": "upper", 
                                    "Data Suite": "data_suite"})

In [4]:
# Drop irrelevant columns
ihme_actuals.drop(["age", "data_suite"], axis = 1, inplace = True) 

In [5]:
# Cast to appropriate data type
ihme_actuals["condition"] = ihme_actuals["condition"].astype('category')
ihme_actuals["sex"] = ihme_actuals["sex"].astype('category')
ihme_actuals["location"] = ihme_actuals["location"].astype('category')
ihme_actuals["measure"] = ihme_actuals["measure"].astype('category')
ihme_actuals["unit"] = ihme_actuals["unit"].astype('category')
ihme_actuals["year"] = ihme_actuals["year"].astype('int')
ihme_actuals["value"] = ihme_actuals["value"].astype('float')
ihme_actuals["lower"] = ihme_actuals["lower"].astype('float')
ihme_actuals["upper"] = ihme_actuals["upper"].astype('float')

In [6]:
ihme_actuals

Unnamed: 0,condition,sex,location,measure,unit,year,value,lower,upper
0,Breast cancer,Male,Sweden,Deaths,Number,2012,11.154988,9.988676,12.488940
1,Breast cancer,Female,Sweden,Deaths,Number,2012,1952.187085,1775.073238,2072.814309
2,Malignant skin melanoma,Male,Sweden,Deaths,Number,2012,358.542147,183.268982,401.445608
3,Malignant skin melanoma,Female,Sweden,Deaths,Number,2012,256.808232,139.468152,288.756416
4,Breast cancer,Male,Sweden except Stockholm,Deaths,Number,2012,7.199021,6.190559,8.372396
...,...,...,...,...,...,...,...,...,...
955,Metastatic phase of breast cancer,Female,Sweden,Incidence,"Rate (per 100,000)",2019,0.000000,0.000000,0.000000
956,Metastatic phase of malignant skin melanoma,Male,Sweden,Incidence,"Rate (per 100,000)",2019,0.000000,0.000000,0.000000
957,Metastatic phase of malignant skin melanoma,Female,Sweden,Incidence,"Rate (per 100,000)",2019,0.000000,0.000000,0.000000
958,Terminal phase of malignant skin melanoma,Male,Sweden,Incidence,"Rate (per 100,000)",2019,0.000000,0.000000,0.000000


In [7]:
# Sort data frame
ihme_actuals = ihme_actuals.sort_values(by = ['condition', 'sex', 'location', 'measure', 'unit', 
                                              'year']).reset_index(drop = True)

In [8]:
ihme_actuals

Unnamed: 0,condition,sex,location,measure,unit,year,value,lower,upper
0,Breast cancer,Female,Stockholm,Deaths,Number,2012,357.432527,318.807856,386.479973
1,Breast cancer,Female,Stockholm,Deaths,Number,2013,360.605135,324.949473,389.527356
2,Breast cancer,Female,Stockholm,Deaths,Number,2014,365.016817,323.692779,395.296536
3,Breast cancer,Female,Stockholm,Deaths,Number,2015,357.235302,315.816381,388.172419
4,Breast cancer,Female,Stockholm,Deaths,Number,2016,366.940581,326.284801,401.275003
...,...,...,...,...,...,...,...,...,...
955,Terminal phase of malignant skin melanoma,Male,Sweden except Stockholm,Prevalence,"Rate (per 100,000)",2015,0.971843,0.505324,1.140718
956,Terminal phase of malignant skin melanoma,Male,Sweden except Stockholm,Prevalence,"Rate (per 100,000)",2016,0.953051,0.499246,1.127656
957,Terminal phase of malignant skin melanoma,Male,Sweden except Stockholm,Prevalence,"Rate (per 100,000)",2017,0.953400,0.504011,1.146946
958,Terminal phase of malignant skin melanoma,Male,Sweden except Stockholm,Prevalence,"Rate (per 100,000)",2018,0.968869,0.501525,1.214614


In [9]:
# Save the prepared data frame
route0 = "../processed_data"

if not os.path.exists(route0):
    os.mkdir(route0)
    
print("saving file corresponding to ihme_actuals.pkl")
ihme_actuals.to_pickle(f"{route0}/ihme_actuals.pkl")
pd.read_pickle(f"{route0}/ihme_actuals.pkl")

saving file corresponding to ihme_actuals.pkl


Unnamed: 0,condition,sex,location,measure,unit,year,value,lower,upper
0,Breast cancer,Female,Stockholm,Deaths,Number,2012,357.432527,318.807856,386.479973
1,Breast cancer,Female,Stockholm,Deaths,Number,2013,360.605135,324.949473,389.527356
2,Breast cancer,Female,Stockholm,Deaths,Number,2014,365.016817,323.692779,395.296536
3,Breast cancer,Female,Stockholm,Deaths,Number,2015,357.235302,315.816381,388.172419
4,Breast cancer,Female,Stockholm,Deaths,Number,2016,366.940581,326.284801,401.275003
...,...,...,...,...,...,...,...,...,...
955,Terminal phase of malignant skin melanoma,Male,Sweden except Stockholm,Prevalence,"Rate (per 100,000)",2015,0.971843,0.505324,1.140718
956,Terminal phase of malignant skin melanoma,Male,Sweden except Stockholm,Prevalence,"Rate (per 100,000)",2016,0.953051,0.499246,1.127656
957,Terminal phase of malignant skin melanoma,Male,Sweden except Stockholm,Prevalence,"Rate (per 100,000)",2017,0.953400,0.504011,1.146946
958,Terminal phase of malignant skin melanoma,Male,Sweden except Stockholm,Prevalence,"Rate (per 100,000)",2018,0.968869,0.501525,1.214614
