## Read the data

In [10]:
import pandas as pd
import numpy as np
import json

In [11]:
city_data = pd.read_csv('yearly_city_surgery.csv', encoding='EUC-KR')

# lets drop row which does not have more than "-" in the "Surgery" column
city_data = city_data[city_data['Surgery'].str.count("-") >= 2]

# Add "Reference Surgery" column to the dataframe with the str split
city_data['Reference Surgery'] = city_data['Surgery'].str.split("-").str[1]

In [12]:
# get Monthly data where column "Monthly" is "Total"
city_data_total = city_data[city_data['Hospital Location'] == 'Total']

In [13]:
for city in city_data["Hospital Location"].unique():
    print(city, city_data[city_data['Hospital Location'] == city].shape[0])

Total 132
Seoul 132
Busan 132
Daegu 132
Incheon 132
Gwangju 132
Daejeon 132
Ulsan 132
Sejong 132
Gyeonggi 132
Gangwon 132
Chungbook 132
Chungnaam 132
Jeonbook 132
Jeonnaam 132
Gyeongbook 132
Gyeongnaam 132
Jeju 132


In [14]:
blood_demand_per_patient = {}

operation_blood_data = pd.read_csv('operation_blood_demand.csv')

for index, row in operation_blood_data.iterrows():
    if row["Transfused patients No."] != 0:
        blood_demand_per_patient[row["Operation"].lower()] = row["Total units transfused"] / row["Transfused patients No."]

In [15]:
blood_demand_per_patient

# save as json
with open('blood_demand_per_patient.json', 'w') as f:
    json.dump(blood_demand_per_patient, f)

In [16]:
city_data["Reference Surgery"]

8         Bilateral thyroidectomy 
9         Bilateral thyroidectomy 
10        Bilateral thyroidectomy 
11        Bilateral thyroidectomy 
12       Aortic valve replacement 
                   ...            
2587                Mastoidectomy 
2588        Liver transplantation 
2589        Liver transplantation 
2590        Liver transplantation 
2591        Liver transplantation 
Name: Reference Surgery, Length: 2376, dtype: object

Check if everything is allright

In [119]:
set(city_data_total["Reference Surgery"].values) - set(february_data["Reference Surgery"].values)

set()

In [122]:
data_dict = {}
# data_dict["Total"] = None
for month in monthly_data["Monthly"].unique():
    data_dict[month] = None

In [124]:
year_columns = monthly_data.columns[4:-2]
year_columns

monthly_dict = {}

for year in year_columns:
    for month in monthly_data["Monthly"].unique():
        if month == "Total":
            continue
        monthly_dict[year + "_" + month] = None

monthly_dict

{'2006_January': None,
 '2006_February': None,
 '2006_March': None,
 '2006_April': None,
 '2006_May': None,
 '2006_June': None,
 '2006_July': None,
 '2006_August': None,
 '2006_September': None,
 '2006_October': None,
 '2006_November': None,
 '2006_December': None,
 '2007_January': None,
 '2007_February': None,
 '2007_March': None,
 '2007_April': None,
 '2007_May': None,
 '2007_June': None,
 '2007_July': None,
 '2007_August': None,
 '2007_September': None,
 '2007_October': None,
 '2007_November': None,
 '2007_December': None,
 '2008_January': None,
 '2008_February': None,
 '2008_March': None,
 '2008_April': None,
 '2008_May': None,
 '2008_June': None,
 '2008_July': None,
 '2008_August': None,
 '2008_September': None,
 '2008_October': None,
 '2008_November': None,
 '2008_December': None,
 '2009_January': None,
 '2009_February': None,
 '2009_March': None,
 '2009_April': None,
 '2009_May': None,
 '2009_June': None,
 '2009_July': None,
 '2009_August': None,
 '2009_September': None,
 '2009_

In [125]:
monthly_data[monthly_data["Monthly"] == "Total"]["2013"]

12    NaN
13    NaN
14    NaN
15    NaN
16    NaN
       ..
211   NaN
212   NaN
213   NaN
214   NaN
215   NaN
Name: 2013, Length: 198, dtype: float64

In [128]:
for key in monthly_dict.keys():
    year = key.split("_")[0]
    month = key.split("_")[1]
    rows = monthly_data[(monthly_data["Monthly"] == month)]
    # print(rows)
    # find rows that have "Surgery Received Person[person]" in "Category"
    person_rows = rows[rows["Category"].str.contains("Surgery Received Person")]
    for index, row in person_rows.iterrows():
        reference_surgery = row["Reference Surgery"]
        blood_usage_per_patient = blood_demand_per_patient[str(reference_surgery).lower().strip()]
        # if year == "2013" and month == "Total":
        #     print(reference_surgery, blood_usage_per_patient, row[year])
        #     print(np.isnan(row[year]))
            
        if not np.isnan(row[year]):
            value = blood_usage_per_patient * row[year]
            if np.isnan(value):
                print(blood_demand_per_patient, row[year])
            monthly_dict[key] += blood_usage_per_patient * row[year]



In [130]:
print(monthly_dict)

# save as json
with open('monthly_blood_demand.json', 'w') as f:
    json.dump(monthly_dict, f)

# create yearly dict

yearly_dict = {}
for key in monthly_dict:
    year = key.split("_")[0]
    if year not in yearly_dict:
        yearly_dict[year] = 0
    yearly_dict[year] += monthly_dict[key]

with open('yearly_blood_demand.json', 'w') as f:
    json.dump(yearly_dict, f)

{'2006_January': 196711.7428308683, '2006_February': 185407.7323187022, '2006_March': 185956.20413796254, '2006_April': 170128.14445368515, '2006_May': 180705.31702672786, '2006_June': 177727.56674422562, '2006_July': 185994.05129152545, '2006_August': 199599.20769424454, '2006_September': 176625.4893819863, '2006_October': 171376.40604616594, '2006_November': 176272.67755778495, '2006_December': 176788.9408387035, '2007_January': 232617.93086487046, '2007_February': 188365.03674390895, '2007_March': 207084.93039855192, '2007_April': 192578.69276279546, '2007_May': 199525.5815918509, '2007_June': 189785.53081570324, '2007_July': 207533.22149263308, '2007_August': 215073.38314863486, '2007_September': 179771.7553836934, '2007_October': 201892.07345512163, '2007_November': 186695.42259547368, '2007_December': 181651.0166107151, '2008_January': 236116.5657807982, '2008_February': 185492.4439113339, '2008_March': 201311.71071066937, '2008_April': 187723.91352748653, '2008_May': 187779.7801

#### JSON common items. Year, Month, Source, 

Now we need to calculate the city demand