In [1]:
import pandas as pd
import numpy as np
from datetime import date

pd.set_option('display.max_columns', 500)

def print_full(x):
    try:
        v = len(x)
    except:
        v = 1000000
    pd.set_option('display.max_rows', v)
    print(x)
    pd.reset_option('display.max_rows')

In [2]:
file = open('uncleaned-data.csv', 'r', encoding='utf8')
df = pd.read_csv(file, dtype={
    "id": "string",
    "age": "string",
    "city": "string",
    "sex": "category",
    "province": "string",
    "country": "string",
    "latitude": float,
    "longitude": float,
    "geo_resolution": "category",
    "date_onset_symptoms": "string",
    "date_admission_hospital": "string",
    "date_confirmation": "string",
    "symptoms": "string",
    "travel_history_dates": "string",
    "travel_history_location": "string",
    "reported_market_exposure": "category",
    "additional_information": "string",
    "chronic_disease": "string",
    "source": "string",
    "sequence_available": "category",
    "outcome": "string",
    "date_death_or_discharge": "string",
    "notes_for_discussion": "string",
    "location": "string",
    "admin3": "string",
    "admin2": "string",
    "admin1": "string",
    "country_new": "string",
    "admin_id": "Int64",
    "data_moderator_initials": "string",
    "chronic_disease_binary": bool,
    "lives_in_Wuhan": "string",
    "travel_history_binary": "boolean"
})

In [47]:
df['travel_history_binary'] = df['travel_history_binary'].fillna(value=False).astype(bool).astype('float')

In [48]:
df['lives_in_Wuhan'] = df['lives_in_Wuhan'].replace(['yes', 'no', pd.NA], [True, False, False]).astype(bool).astype('float')

In [49]:
df = df.drop(['ID', 'admin_id', 'source', 'admin1', 'admin2', 'admin3', 'data_moderator_initials', 'notes_for_discussion', 'location', 'country_new'], axis='columns')

# Remove columns that *could* be useful if the data was less sparse/more relevant to my project
df = df.drop(['date_death_or_discharge'], axis='columns')
df = df.drop(['additional_information'], axis='columns')
df = df.drop(['reported_market_exposure'], axis='columns')
df = df.drop(['city', 'province', 'country'], axis='columns')
df = df.drop(['sequence_available'], axis=1)

In [50]:
def label_encode(df1, col):
    df1[col] = df1[col].astype('category')
    cd = dict(enumerate(df1[col].cat.categories))
    df1[col] = df1[col].cat.codes
    return df1, cd

In [51]:
def replace_all_with(df1, col, items, rep):
    df1[col] = df1[col].replace(items, [rep] * len(items))
    return df1

df = replace_all_with(df, 'outcome', ['recovered', 'Alive', 'not hospitalized', 'recovering at home 03.03.2020'], 'Recovered')
df = replace_all_with(df, 'outcome', ['Stable', 'Discharged', 'discharge', 'discharged', 'Discharged from hospital', 'Migrated', 'Migrated_Other', 'Symptoms only improved with cough. Currently hospitalized for follow-up.'], 'Recovered (hospitalized)')
df = replace_all_with(df, 'outcome', ['Death', 'Died', 'Dead', 'dead', 'death', 'died'], 'Deceased')
df = replace_all_with(df, 'outcome', ['https://www.mspbs.gov.py/covid-19.php', 'released from quarantine'], pd.NA)
df = replace_all_with(df, 'outcome', ['stable condition', 'stable', 'Under treatment', 'Receiving Treatment', 'severe', 'unstable', 'severe illness', 'critical condition, intubated as of 14.02.2020', 'critical condition', 'treated in an intensive care unit (14.02.2020)', 'Critical condition'], 'Hospitalized')

df = df.dropna(how='any', subset=['outcome'])

df, category_dict = label_encode(df, 'outcome')

In [52]:
category_dict

{0: 'Deceased',
 1: 'Hospitalized',
 2: 'Recovered',
 3: 'Recovered (hospitalized)'}

In [5]:
df[pd.isnull(df['country'])]

Unnamed: 0,ID,age,sex,city,province,country,latitude,longitude,geo_resolution,date_onset_symptoms,date_admission_hospital,date_confirmation,symptoms,lives_in_Wuhan,travel_history_dates,travel_history_location,reported_market_exposure,additional_information,chronic_disease_binary,chronic_disease,source,sequence_available,outcome,date_death_or_discharge,notes_for_discussion,location,admin3,admin2,admin1,country_new,admin_id,data_moderator_initials,travel_history_binary
1176,000-1-11056,50-59,male,,Taiwan,,23.75947,120.9559,admin1,,,15.02.2020,,,,,,,False,,https://www.cdc.gov.tw/Bulletin/Detail/C7Sfkry...,,,,,,,,Taiwan,,846,,
1182,000-1-11061,80-80,female,,Taiwan,,23.75947,120.9559,admin1,,,17.02.2020,,,,,,Case 21;,False,,https://www.cdc.gov.tw/Bulletin/Detail/vyO8cx1...,,,,,,,,Taiwan,,846,,
1183,000-1-11062,30-39,male,,Taiwan,,23.75947,120.9559,admin1,28.01.2020,,17.02.2020,"cough, fever",,,,,Case 22;,False,,https://www.cdc.gov.tw/Bulletin/Detail/vyO8cx1...,,,,,,,,Taiwan,,846,,
1892,000-1-11700,20,female,,Taiwan,,23.75947,120.9559,admin1,28.01.2020,,21.02.2020,"fever, cough",,,,,"case 25, visited case 24 in hospital on 12.02....",False,,https://www.cdc.gov.tw/Bulletin/Detail/-aJ2VX6...,,,,,,,,Taiwan,,846,,
1893,000-1-11701,40,female,,Taiwan,,23.75947,120.9559,admin1,,,21.02.2020,"no respiratory symptoms, esophageal reflux",,,,,"case 26, lives with case 24",False,,https://www.cdc.gov.tw/Bulletin/Detail/-aJ2VX6...,,,,,,,,Taiwan,,846,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
661674,005-50527,,,,,,,,,,,,,,,,,,False,,,,,,,,,,,,,,False
661675,005-50528,,,,,,,,,,,,,,,,,,False,,,,,,,,,,,,,,False
661676,005-50529,,,,,,,,,,,,,,,,,,False,,,,,,,,,,,,,,False
661678,005-50530,,,,,,,,,,,,,,,,,,False,,,,,,,,,,,,,,False


In [42]:
df, city_dict = label_encode(df, 'city')

In [8]:
sex_dist = df.sex.value_counts(normalize=True)
na_sex_rows = df['sex'].isnull()
df.loc[na_sex_rows, 'sex'] = np.random.choice(sex_dist.index, size=len(df[na_sex_rows]), p = sex_dist.values)
df['sex'] = df['sex'].cat.codes.astype('float')

In [9]:
df['chronic_disease_binary'] = df['chronic_disease_binary'].astype('float')

In [10]:
df['geo_resolution'] = df['geo_resolution'].cat.codes.astype('float')

In [11]:
df['age'] = df['age'].str.replace(r'(\d+) (month|week)s?', '0', regex=True)
df = df.combine_first(df['age'].str.extract(r'^(?P<age_min>\d?\.?\d+)\s*-?\s*(?P<age_max>\d+)?$')).drop(['age'], axis=1)

In [12]:
df['age_max'] = np.where(df['age_max'].isnull(), df['age_min'], float('nan'))

In [13]:
df['age_max'] = df['age_max'].fillna(value=float('nan')).astype('float')
df['age_min'] = df['age_min'].fillna(value=float('nan')).astype('float')

In [14]:
df['age_max'] = df['age_max'].groupby([df['sex'], df['chronic_disease_binary']]).apply(lambda x: x.fillna(x.mean())).astype('float')
df['age_min'] = df['age_min'].groupby([df['sex'], df['chronic_disease_binary']]).apply(lambda x: x.fillna(x.mean())).astype('float')

In [15]:
def convert_col_to_days_since(series, since, invert=False, impute=0, type='float'):
    if not invert:
        v = series - since
    else:
        v = since - series
    
    v = v.dt.days

    if impute is not None:
        v = v.fillna(value=impute)

    return v.astype(type)

In [16]:
df = df.combine_first(df['date_confirmation'].str.extract(r'^(?P<date_confirmation_min>\d+\.\d+\.\d+)\s*-?\s*(?P<date_confirmation_max>\d+\.\d+\.\d+)?$')).drop(['date_confirmation'], axis=1)
df['date_confirmation_max'] = np.where(df['date_confirmation_max'].isnull(), df['date_confirmation_min'], df['date_confirmation_max'])

In [17]:
df[['date_confirmation_min', 'date_confirmation_max']] = df[['date_confirmation_min', 'date_confirmation_max']].apply(pd.to_datetime)

df['ds_date_confirmation_min'] = convert_col_to_days_since(df['date_confirmation_min'], df['date_confirmation_min'].min())
df['ds_date_confirmation_max'] = convert_col_to_days_since(df['date_confirmation_max'], df['date_confirmation_min'].min())

In [18]:
#df[~pd.isnull(df['date_onset_symptoms'])][['date_confirmation', 'date_onset_symptoms']]

df['date_onset_symptoms'] = df['date_onset_symptoms'].replace(['01.01.2020-12.01.2020'], ['06.01.2020'])
df['date_onset_symptoms'] = pd.to_datetime(df['date_onset_symptoms'])

df['ds_date_onset_symptoms'] = convert_col_to_days_since(df['date_onset_symptoms'], df['date_confirmation_min'], invert=True)
df = df.drop(['date_onset_symptoms'], axis=1)

In [19]:
df['date_admission_hospital'] =  pd.to_datetime(df['date_admission_hospital'])

df['ds_date_admission_hospital'] = convert_col_to_days_since(df['date_admission_hospital'], df['date_confirmation_min'], invert=True)
df = df.drop(['date_admission_hospital'], axis=1)

In [20]:
df = df.drop(['date_confirmation_min', 'date_confirmation_max'], axis=1)

In [21]:
df = pd.concat([df, df['travel_history_dates'].str.extract(r'(?P<date_enter_Wuhan>\d+\.\d+\.\d+)?\s*-?\s*(?=(?P<date_exit_Wuhan>\d+\.\d+\.\d+))')], axis=1)
df = df.drop(['travel_history_dates'], axis=1)

df['date_enter_Wuhan'] = pd.to_datetime(df['date_enter_Wuhan'])
df['date_exit_Wuhan'] = pd.to_datetime(df['date_exit_Wuhan'])
df['time_in_Wuhan'] = abs((df[~pd.isnull(df['date_exit_Wuhan'])]['date_exit_Wuhan'] - df['date_enter_Wuhan']).dt.days)
df['time_in_Wuhan'] = np.where(pd.isnull(df['time_in_Wuhan']) & ~pd.isnull(df['date_exit_Wuhan']), df['time_in_Wuhan'].mean(), df['time_in_Wuhan'])

df['time_in_Wuhan'] = df['time_in_Wuhan'].fillna(value=0)
df = df.drop(['date_enter_Wuhan', 'date_exit_Wuhan'], axis=1)

In [22]:
# Drop a row that contains a location under chronic_disease since it's likely bad data entry
df = df[~df['chronic_disease'].str.contains(r'Iran', na=False)]

In [23]:
df['chronic_disease'] = df['chronic_disease'].str.lower()
df['chronic_disease'] = df['chronic_disease'].str.replace(r'(:|;)', ",", regex=True)
df['chronic_disease'] = df['chronic_disease'].str.replace(r'(history of hypertension|hypertension for more than 20 years|hypertenstion|hypertensive)', 'hypertension', regex=True)
df['chronic_disease'] = df['chronic_disease'].str.replace('copd', 'chronic obstructive pulmonary disease')
df['chronic_disease'] = df['chronic_disease'].str.replace('diabetes for more than 20 years', 'diabetes')

df = pd.concat([df, df['chronic_disease'].str.get_dummies(sep=',').add_prefix('chronic_disease_')], axis=1).drop('chronic_disease', axis=1)

In [24]:
df['symptoms'] = df['symptoms'].str.lower()
df['symptoms'] = df['symptoms'].str.replace(r'(:|;)', ",", regex=True)
df = df[~df['symptoms'].str.contains('none', na=False)]

df = pd.concat([df, df['symptoms'].str.get_dummies(sep=',').add_prefix('symptoms_')], axis=1).drop('symptoms', axis=1)

In [25]:
df['travel_history_location'] = df['travel_history_location'].str.lower()
df['travel_history_location'] = df['travel_history_location'].str.replace(r'(:|;)', ",", regex=True)
df = pd.concat([df, df['travel_history_location'].str.get_dummies(sep=',').add_prefix('travel_history_includes_')], axis=1).drop('travel_history_location', axis=1)

In [26]:
df

Unnamed: 0,age_max,age_min,chronic_disease_binary,geo_resolution,latitude,lives_in_Wuhan,longitude,outcome,sex,travel_history_binary,ds_date_confirmation_min,ds_date_confirmation_max,ds_date_onset_symptoms,ds_date_admission_hospital,time_in_Wuhan,chronic_disease_ and lung cancer,chronic_disease_ asthma,chronic_disease_ cerebral infarction,chronic_disease_ chronic obstructive pulmonary disease,chronic_disease_ chronic renal insufficiency,chronic_disease_ colon cancer surgery four years ago,chronic_disease_ coronary artery stenting,chronic_disease_ coronary heart disease,chronic_disease_ coronary heart disease for which a stent had been implanted,chronic_disease_ coronary stenting,chronic_disease_ diabetes,chronic_disease_ encephalomalacia,chronic_disease_ frequent ventricular premature beat (fvpb),chronic_disease_ hemorrhage of digestive tract,chronic_disease_ hip replacement,chronic_disease_ hypertension,chronic_disease_ parkinson's disease,chronic_disease_ stenocardia,chronic_disease_ taking medicine of madopar,chronic_disease_ tuberculosis,chronic_disease_ type 2 diabetes,"chronic_disease_""thought to have had other pre-existing conditions""",chronic_disease_asthma,chronic_disease_atherosclerosis,chronic_disease_atrial fibrillation,chronic_disease_benign prostatic hyperplasia,chronic_disease_benign prostatic hypertrophy,chronic_disease_bronchial asthma,chronic_disease_cardiac disease,chronic_disease_cardiac dysrhythmia,chronic_disease_cardiomyopathy,chronic_disease_cardiovascular disease,chronic_disease_cerebrovascular accident infarct,chronic_disease_cerebrovascular infarct,chronic_disease_chronic bronchitis,chronic_disease_chronic kidney disease,chronic_disease_chronic obstructive pulmonary disease,chronic_disease_chronic pulmonary condition,chronic_disease_colon cancer,chronic_disease_coronary artery disease,chronic_disease_coronary heart disease,chronic_disease_diabetes,chronic_disease_dislipidemia,chronic_disease_dyslipidemia,chronic_disease_hepatitis b,chronic_disease_hiv positive,chronic_disease_hypertension,chronic_disease_hyperthyroidism,chronic_disease_hypothyroidism,chronic_disease_impaired fasting glucose,chronic_disease_ischemic heart disease,chronic_disease_parkinson's disease for five years,chronic_disease_pre-renal azotemia,chronic_disease_prostate cancer,chronic_disease_prostate hypertrophy,chronic_disease_renal disease,chronic_disease_tongue cancer,chronic_disease_upper git bleeding,chronic_disease_valvular heart disease,symptoms_ acute kidney injury,symptoms_ acute respiratory distress syndrome,symptoms_ acute respiratory failure,symptoms_ afebrile,symptoms_ chest pain,symptoms_ conjunctivitis,symptoms_ cough,symptoms_ dyspnea,symptoms_ emesis,symptoms_ expectoration,symptoms_ fatigue,symptoms_ fatigure,symptoms_ fever,symptoms_ gasp,symptoms_ grasp,symptoms_ headache,symptoms_ hypoxia,symptoms_ kidney failure and hypertension,symptoms_ little sputum,symptoms_ malaise,symptoms_ mialgia,symptoms_ muscular soreness,symptoms_ myalgia,symptoms_ myalgias,symptoms_ pneumonia,symptoms_ respiratory stress,symptoms_ running nose,symptoms_ runny nose,symptoms_ sensation of chill,symptoms_ shortness of breath,symptoms_ somnolence,symptoms_ sore throat,symptoms_ sputum,symptoms_ weak,symptoms_acute coronary syndrome,symptoms_acute kidney injury,symptoms_acute myocardial infarction,symptoms_acute renal failure,symptoms_acute respiratory disease,symptoms_acute respiratory disease syndrome,symptoms_acute respiratory distress,symptoms_acute respiratory distress syndrome,symptoms_acute respiratory failure,symptoms_anorexia,symptoms_arrhythmia,symptoms_asymptomatic,symptoms_body malaise,symptoms_cardiac arrhythmia,symptoms_cardiogenic shock,symptoms_cardiopulmonary arrest,symptoms_chest discomfort,symptoms_chest distress,symptoms_chills,symptoms_cold chills,symptoms_colds,symptoms_congestive heart failure,symptoms_cough,symptoms_diarrhea,symptoms_difficulty breathing,symptoms_discomfort,symptoms_dizziness,symptoms_dry cough,symptoms_dysphagia,symptoms_dyspnea,symptoms_eye irritation,symptoms_fatigue,symptoms_fever,symptoms_gastritis,symptoms_headache,symptoms_heart failure,symptoms_hypoxia,symptoms_lesions on chest radiographs,symptoms_mild to moderate,symptoms_multiple electrolyte imbalance,symptoms_multiple organ failure,symptoms_myocardial dysfunction,symptoms_myocardial infarction,symptoms_obnubilation,symptoms_pneumonia,symptoms_primary myelofibrosis,symptoms_respiratory symptoms,symptoms_sepsis,symptoms_septic shock,symptoms_severe,symptoms_severe acute respiratory infection,symptoms_severe pneumonia,symptoms_significant clinical suspicion,symptoms_sore throat,symptoms_systemic weakness,symptoms_torpid evolution with respiratory distress and severe bronchopneumonia,symptoms_transient fatigue,travel_history_includes_ abu dhabi,travel_history_includes_ anhui,travel_history_includes_ argentina,travel_history_includes_ auvergne-rhone-alpes,travel_history_includes_ bahrain,travel_history_includes_ belgium,travel_history_includes_ bengaluru,travel_history_includes_ brazil,travel_history_includes_ burkina faso,travel_history_includes_ canada,travel_history_includes_ chennai,travel_history_includes_ china,travel_history_includes_ comunidad de madrid,travel_history_includes_ denmark,travel_history_includes_ dubai,travel_history_includes_ egypt,travel_history_includes_ england,travel_history_includes_ france,travel_history_includes_ germany,travel_history_includes_ ghana,travel_history_includes_ greater accra region,travel_history_includes_ greece,travel_history_includes_ guangdong,travel_history_includes_ guiyang city,travel_history_includes_ guizhou,travel_history_includes_ hong kong,travel_history_includes_ hubei,travel_history_includes_ hyderabad,travel_history_includes_ india,travel_history_includes_ indonesia,travel_history_includes_ ireland,travel_history_includes_ israel,travel_history_includes_ italy,travel_history_includes_ japan,travel_history_includes_ jiangsu,travel_history_includes_ johor,travel_history_includes_ jordan,travel_history_includes_ karnataka,travel_history_includes_ kazakhstan,travel_history_includes_ kenya,travel_history_includes_ khanh hoa,travel_history_includes_ lombardia,travel_history_includes_ london,travel_history_includes_ malaysia,travel_history_includes_ metro manila,travel_history_includes_ netherlands,travel_history_includes_ new york,travel_history_includes_ parana,travel_history_includes_ philippines,travel_history_includes_ portugal,travel_history_includes_ qatar,travel_history_includes_ shaanxi,travel_history_includes_ singapore,travel_history_includes_ somalia,travel_history_includes_ south africa,travel_history_includes_ south korea,travel_history_includes_ spain,travel_history_includes_ sri lanka,travel_history_includes_ sweden,travel_history_includes_ switzerland,travel_history_includes_ taiwan,travel_history_includes_ tamil nadu,travel_history_includes_ telangana,travel_history_includes_ thailand,travel_history_includes_ togo,travel_history_includes_ turkey,travel_history_includes_ united arab emirates,travel_history_includes_ united kingdom,travel_history_includes_ united states,travel_history_includes_ vietnam,travel_history_includes_ west midlands,travel_history_includes_ wuhan city,travel_history_includes_ xi'an city,travel_history_includes_ yunnan,travel_history_includes_abu dhabi,travel_history_includes_amsterdam,travel_history_includes_ankang city,travel_history_includes_athens,travel_history_includes_australia,travel_history_includes_austria,travel_history_includes_bahamas,travel_history_includes_bahrain,travel_history_includes_bangladesh,travel_history_includes_bavaria,travel_history_includes_belgium,travel_history_includes_bergamo,travel_history_includes_birmingham,travel_history_includes_brazil,travel_history_includes_brussels,travel_history_includes_burkina faso,travel_history_includes_california,travel_history_includes_canada,travel_history_includes_central african republic,travel_history_includes_china,travel_history_includes_colombo,travel_history_includes_cote d'ivoire,travel_history_includes_daegu,travel_history_includes_denmark,travel_history_includes_djibouti,travel_history_includes_dongguan city,travel_history_includes_dubai,travel_history_includes_dublin,travel_history_includes_egypt,travel_history_includes_ezhou city,travel_history_includes_finland,travel_history_includes_florida,travel_history_includes_france,travel_history_includes_free state,travel_history_includes_germany,travel_history_includes_ghana,travel_history_includes_greece,travel_history_includes_guinea,travel_history_includes_guiyang city,travel_history_includes_guyana,travel_history_includes_haute-savoie,travel_history_includes_hubei,travel_history_includes_indonesia,travel_history_includes_iran,travel_history_includes_ireland,travel_history_includes_italy,travel_history_includes_jakarta,travel_history_includes_japan,travel_history_includes_johor,travel_history_includes_kenya,travel_history_includes_lebanon,travel_history_includes_leicestershire,travel_history_includes_liberia,travel_history_includes_lisbon,travel_history_includes_london,travel_history_includes_madrid,travel_history_includes_malaysia,travel_history_includes_mexico,travel_history_includes_milan,travel_history_includes_mozambique,travel_history_includes_netherlands,travel_history_includes_new york,travel_history_includes_new york city,travel_history_includes_new zealand,travel_history_includes_nha trang,travel_history_includes_norway,travel_history_includes_oman,travel_history_includes_pakistan,travel_history_includes_paris,travel_history_includes_philippines,travel_history_includes_phuket,travel_history_includes_portugal,travel_history_includes_puntland,travel_history_includes_qatar,travel_history_includes_republic of congo,travel_history_includes_russia,travel_history_includes_san juan,travel_history_includes_saudi arabia,travel_history_includes_scotland,travel_history_includes_senegal,travel_history_includes_sharjah,travel_history_includes_shenzhen city,travel_history_includes_singapore,travel_history_includes_somalia,travel_history_includes_south africa,travel_history_includes_south korea,travel_history_includes_spain,travel_history_includes_sri lanka,travel_history_includes_stockholm,travel_history_includes_suzhou city,travel_history_includes_sweden,travel_history_includes_switzerland,travel_history_includes_taipei,travel_history_includes_taiwan,travel_history_includes_thailand,travel_history_includes_tianmen city,travel_history_includes_togo,travel_history_includes_trinidad and tobago,travel_history_includes_turkey,travel_history_includes_united arab emirates,travel_history_includes_united kingdom,travel_history_includes_united states,travel_history_includes_washington,travel_history_includes_weinan city,travel_history_includes_western cape,travel_history_includes_wuhan,travel_history_includes_wuhan city,travel_history_includes_xi'an city,travel_history_includes_xiogan city
0,37.759312,37.747402,0.0,4.0,22.365019,0.0,114.133808,1,1.0,0.0,43.0,43.0,0.0,0.0,68.52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,78.000000,78.000000,0.0,4.0,45.297748,0.0,11.658382,0,1.0,0.0,50.0,50.0,0.0,0.0,0.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,61.000000,61.000000,0.0,0.0,1.353460,0.0,103.815100,3,0.0,0.0,43.0,43.0,0.0,0.0,0.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
113,28.000000,28.000000,0.0,0.0,1.353460,0.0,103.815100,3,1.0,0.0,43.0,43.0,0.0,0.0,0.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
224,56.000000,56.000000,0.0,0.0,1.353460,0.0,103.815100,3,0.0,0.0,43.0,43.0,0.0,0.0,0.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
670224,80.000000,80.000000,0.0,2.0,44.461123,0.0,-73.081581,0,0.0,0.0,76.0,76.0,0.0,0.0,0.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
672529,37.759312,60.000000,0.0,2.0,44.003584,0.0,-102.826120,0,1.0,0.0,275.0,275.0,0.0,0.0,0.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
672651,37.547725,90.000000,0.0,2.0,47.491332,0.0,-121.803640,0,0.0,0.0,92.0,92.0,0.0,0.0,0.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
672674,70.000000,70.000000,0.0,2.0,45.546910,0.0,-122.414900,0,1.0,0.0,275.0,275.0,0.0,0.0,0.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [27]:
df.to_pickle('./processed-data.pkl')
df.to_csv('./processed-data.csv', index=False)