In [1]:
import pandas as pd
import numpy as np
from numpy import nan as Nan

In [2]:
def convert_county_name(input_county):
    return input_county[:input_county.find(" County")]

def convert_id(input_id):
    return input_id[9:]

In [21]:
county_df = pd.read_csv("datasets/before/2010social.csv").fillna(0) # Social Factors in 2010
county_df.columns = county_df.iloc[0] # Set the top row as headers
county_df = county_df[1:]
county_df = county_df[["id", "Geographic Area Name"]]

county_df = county_df.rename(columns={"Geographic Area Name":"name", "id":"FIPS"})
county_df["name"] = county_df["name"].apply(convert_county_name)
county_df["FIPS"] = county_df["FIPS"].apply(convert_id)

# county_df = county_df.set_index("name");

county_df.shape

(52, 2)

In [7]:
def find_county_name(input_num):
    if not (str)(input_num).isdigit():
        return Nan
    
    fips = str(48000 + int(str(input_num)))

    if (county_df["FIPS"] == fips).any():
        return county_df[county_df["FIPS"] == fips].iloc[0]["name"]
    else:
        return Nan

In [22]:
IP_df = []

for year in range(2010, 2020): # Read Hospital Discharges Data from 2010 and 2019
    reader = "datasets/before/" + str(year) + "IP.xlsx"
    IP_original = pd.read_excel(reader).fillna(0)
    IPyear_df = pd.DataFrame()
    for i in range (1, 5): # Get the data of four quarters
        quarter = str(i) + "q" + str(year % 100)
        # print(quarter)
        if year <= 2016:
            qid = quarter + " Inpatient Discharges"
        else:
            qid = "q" + str(i)
            
        if 2014 <= year and year <= 2016:
            county = "Hospital County"
        elif year >= 2017:
            county = "COUNTY"
        else:
            county = "Hospital county"
            
        IPyear_df[[quarter]] = IP_original.loc[:,[county, qid]].groupby(county).sum()
    
    IPyear_df.index.names = ["county"]
    
    if year >= 2017:
        IPyear_df = IPyear_df.reset_index()
        IPyear_df["name"] = IPyear_df["county"].apply(find_county_name)
        IPyear_df = IPyear_df.dropna().drop(columns=["county"]).set_index("name")
        IPyear_df.index.names = ["county"]
        
    IP_df.append(IPyear_df) #IP_df[i] saves data from Year (2010 + i)

In [24]:
mergedIP_df = IP_df[0]
for year in range(2011, 2020):
    mergedIP_df = pd.concat([mergedIP_df, IP_df[year - 2010]], axis=1, join='inner')

In [25]:
mergedIP_df

Unnamed: 0_level_0,1q10,2q10,3q10,4q10,1q11,2q11,3q11,4q11,1q12,2q12,...,3q17,4q17,1q18,2q18,3q18,4q18,1q19,2q19,3q19,4q19
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Angelina,3684.0,3623.0,2789.0,4512.0,3802.0,3429.0,3513.0,3369.0,3786.0,3644.0,...,3516,3538,3643,3511,3586,3214,3699,3653,3851,3753
Bastrop,393.0,330.0,156.0,177.0,179.0,186.0,167.0,176.0,155.0,152.0,...,38,55,52,38,7,39,42,51,37,39
Bell,11196.0,11304.0,11202.0,11682.0,11654.0,11409.0,11329.0,12609.0,12080.0,11544.0,...,13065,13445,13268,13111,12781,13247,13020,13009,12828,12687
Bexar,59135.0,60357.0,61874.0,61775.0,61938.0,60990.0,62250.0,61528.0,65238.0,62287.0,...,68428,70072,71387,70020,69140,71041,69699,70088,71462,71554
Bowie,7117.0,6988.0,6847.0,6551.0,6784.0,6532.0,5993.0,6450.0,6783.0,6864.0,...,6437,6183,6017,5769,5978,5935,6017,6037,6017,5946
Brazoria,2108.0,2142.0,1955.0,2124.0,2171.0,1949.0,2033.0,1932.0,1914.0,1820.0,...,3258,2894,4512,3632,3673,3867,3929,3697,3988,3701
Brazos,6937.0,6985.0,6843.0,6965.0,6975.0,6517.0,7096.0,6834.0,6781.0,6849.0,...,7187,7143,7679,7464,7455,7533,7158,7483,7519,7714
Cameron,14225.0,13412.0,13479.0,14028.0,13718.0,13007.0,13810.0,14087.0,14590.0,13034.0,...,12762,13089,13568,12700,12906,11276,11776,11279,11382,11914
Collin,19779.0,19798.0,19772.0,20815.0,20807.0,21031.0,21417.0,20257.0,19968.0,21677.0,...,26304,26960,26867,27187,27069,28096,27886,28206,28369,29614
Comal,1505.0,1529.0,1723.0,1678.0,1781.0,1653.0,1608.0,1791.0,1959.0,1813.0,...,2598,2682,2838,2630,2472,2528,2730,2656,1581,3743
