In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
print("water")

water


In [2]:
dengue = pd.read_csv('DENGUE FEVER.csv')
dengue.head()

Unnamed: 0.1,Unnamed: 0,year,week,month,adm3_pcode,adm3_en,tave,tmin,tmax,heat_index,...,pharmacy_nearest,pop_count_total,pop_density_mean,clinic_count,dentist_count,doctors_count,hospital_count,optician_count,pharmacy_count,case_total
0,0,2008,1,1,PH137401000,City of Mandaluyong,25.845,23.536667,29.196667,27.045,...,,11544.901089,31300.004557,0.0,0.0,0.0,0.0,0.0,0.0,
1,1,2008,1,1,PH137503000,City of Navotas,25.436667,23.408333,28.186667,26.361667,...,,19420.567766,42940.326172,0.0,0.0,0.0,0.0,0.0,0.0,
2,2,2008,1,1,PH137603000,City of Muntinlupa,25.489901,23.190525,28.79678,26.600326,...,,49608.98546,10734.906445,0.0,0.0,0.0,0.0,0.0,0.0,
3,3,2008,1,12,PH137401000,City of Mandaluyong,26.446667,24.63,28.93,28.196667,...,,11544.901089,31300.004557,0.0,0.0,0.0,0.0,0.0,0.0,3.0
4,4,2008,1,12,PH137503000,City of Navotas,26.156667,24.513333,28.403333,27.556667,...,,19420.567766,42940.326172,0.0,0.0,0.0,0.0,0.0,0.0,16.0


In [3]:
print(dengue.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2811 entries, 0 to 2810
Data columns (total 30 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Unnamed: 0        2811 non-null   int64  
 1   year              2811 non-null   int64  
 2   week              2811 non-null   int64  
 3   month             2811 non-null   int64  
 4   adm3_pcode        2811 non-null   object 
 5   adm3_en           2811 non-null   object 
 6   tave              2811 non-null   float64
 7   tmin              2811 non-null   float64
 8   tmax              2811 non-null   float64
 9   heat_index        2811 non-null   float64
 10  pr                2811 non-null   float64
 11  wind_speed        2811 non-null   float64
 12  rh                2811 non-null   float64
 13  solar_rad         2811 non-null   float64
 14  uv_rad            2811 non-null   float64
 15  clinic_nearest    1686 non-null   float64
 16  dentist_nearest   1686 non-null   float64


In [4]:
dengue_cleaned = dengue[dengue['year'] >= 2013]
dengue_cleaned = dengue_cleaned.drop(['Unnamed: 0'], axis = 1)
dengue_cleaned.fillna(0, inplace = True)
dengue_cleaned = dengue_cleaned.apply(lambda x: x.map('{:.2f}'.format) if x.dtype == 'float64' else x)
adm3_pcode_mapping = {
    'PH137401000': 1,
    'PH137503000': 2,
    'PH137603000': 3
}
dengue_cleaned['adm3_pcode'] = dengue_cleaned['adm3_pcode'].map(adm3_pcode_mapping)
dengue_cleaned = dengue_cleaned.drop('adm3_en', axis = 1)
dengue_cleaned = dengue_cleaned.applymap(lambda x: float(x))

dengue_cleaned = dengue_cleaned.groupby(['year','month','adm3_pcode']).mean()
climate_columns = ['tave','tmin','tmax','heat_index','pr','wind_speed','rh','solar_rad','uv_rad']
n_shifts = 3

for col in climate_columns:
    for shift in range(n_shifts*1, n_shifts*4, 3):
        dengue_cleaned[f'{col}-{shift/3}'] = dengue_cleaned[col].shift(shift)
dengue_cleaned = dengue_cleaned.reset_index()
dengue_cleaned = dengue_cleaned.dropna()
dengue_cleaned = dengue_cleaned[dengue_cleaned['year'] > 2013]
columns_to_drop = ['year', 'week', 'month', 'adm3_pcode', 'clinic_count', 'clinic_nearest']
dengue_cleaned = dengue_cleaned.drop(columns=columns_to_drop)

In [5]:
print(dengue_cleaned.info())

dengue_cleaned.head()

<class 'pandas.core.frame.DataFrame'>
Index: 324 entries, 36 to 359
Data columns (total 49 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   tave              324 non-null    float64
 1   tmin              324 non-null    float64
 2   tmax              324 non-null    float64
 3   heat_index        324 non-null    float64
 4   pr                324 non-null    float64
 5   wind_speed        324 non-null    float64
 6   rh                324 non-null    float64
 7   solar_rad         324 non-null    float64
 8   uv_rad            324 non-null    float64
 9   dentist_nearest   324 non-null    float64
 10  doctors_nearest   324 non-null    float64
 11  hospital_nearest  324 non-null    float64
 12  optician_nearest  324 non-null    float64
 13  pharmacy_nearest  324 non-null    float64
 14  pop_count_total   324 non-null    float64
 15  pop_density_mean  324 non-null    float64
 16  dentist_count     324 non-null    float64
 17  d

Unnamed: 0,tave,tmin,tmax,heat_index,pr,wind_speed,rh,solar_rad,uv_rad,dentist_nearest,...,wind_speed-3.0,rh-1.0,rh-2.0,rh-3.0,solar_rad-1.0,solar_rad-2.0,solar_rad-3.0,uv_rad-1.0,uv_rad-2.0,uv_rad-3.0
36,25.556,23.066,28.834,26.434,0.48,3.714,68.282,190.046,22.378,10000.0,...,2.064,78.245,78.82,78.188,153.038333,175.394,192.85,18.603333,21.47,23.406
37,24.922,22.712,27.774,25.506,0.286,3.138,67.308,197.286,23.014,10000.0,...,1.524,77.398333,78.062,78.89,153.863333,174.8,193.374,18.56,21.1,23.496
38,25.18,22.716,28.438,25.976,0.498,3.692,68.996,189.62,22.312,10000.0,...,2.07,78.933333,79.504,78.668,154.22,176.642,194.846,18.708333,21.58,23.554
39,26.608,23.82,30.206,28.102,0.516,2.912,72.24,196.138,23.314,10000.0,...,3.084,68.282,78.245,78.82,190.046,153.038333,175.394,22.378,18.603333,21.47
40,26.208,23.546,29.464,27.344,0.468,2.182,70.792,197.318,23.3,10000.0,...,2.536,67.308,77.398333,78.062,197.286,153.863333,174.8,23.014,18.56,21.1


In [6]:
dengue_cleaned.to_csv('Cleaned_Dengue_Data.csv', index=False)

In [7]:
leptospirosis = pd.read_csv('LEPTOSPIROSIS.csv')
leptospirosis.head()
print(leptospirosis.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2811 entries, 0 to 2810
Data columns (total 30 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Unnamed: 0        2811 non-null   int64  
 1   year              2811 non-null   int64  
 2   week              2811 non-null   int64  
 3   month             2811 non-null   int64  
 4   adm3_pcode        2811 non-null   object 
 5   adm3_en           2811 non-null   object 
 6   tave              2811 non-null   float64
 7   tmin              2811 non-null   float64
 8   tmax              2811 non-null   float64
 9   heat_index        2811 non-null   float64
 10  pr                2811 non-null   float64
 11  wind_speed        2811 non-null   float64
 12  rh                2811 non-null   float64
 13  solar_rad         2811 non-null   float64
 14  uv_rad            2811 non-null   float64
 15  clinic_nearest    1686 non-null   float64
 16  dentist_nearest   1686 non-null   float64


In [8]:
leptospirosis_cleaned = leptospirosis[leptospirosis['year'] >= 2013]
leptospirosis_cleaned = leptospirosis_cleaned.drop(['Unnamed: 0'], axis = 1)
leptospirosis_cleaned.fillna(0, inplace = True)
leptospirosis_cleaned = leptospirosis_cleaned.apply(lambda x: x.map('{:.2f}'.format) if x.dtype == 'float64' else x)
adm3_pcode_mapping = {
    'PH137401000': 1,
    'PH137503000': 2,
    'PH137603000': 3
}
leptospirosis_cleaned['adm3_pcode'] = leptospirosis_cleaned['adm3_pcode'].map(adm3_pcode_mapping)
leptospirosis_cleaned = leptospirosis_cleaned.drop('adm3_en', axis = 1)
leptospirosis_cleaned = leptospirosis_cleaned.applymap(lambda x: float(x))

leptospirosis_cleaned = leptospirosis_cleaned.groupby(['year','month','adm3_pcode']).mean()
climate_columns = ['tave','tmin','tmax','heat_index','pr','wind_speed','rh','solar_rad','uv_rad']
n_shifts = 3

for col in climate_columns:
    for shift in range(n_shifts*1, n_shifts*4, 3):
        leptospirosis_cleaned[f'{col}-{shift/3}'] = leptospirosis_cleaned[col].shift(shift)
leptospirosis_cleaned = leptospirosis_cleaned.reset_index()
leptospirosis_cleaned = leptospirosis_cleaned.dropna()
leptospirosis_cleaned = leptospirosis_cleaned[leptospirosis_cleaned['year'] > 2013]
columns_to_drop = ['year', 'week', 'month', 'adm3_pcode', 'clinic_count', 'clinic_nearest']
leptospirosis_cleaned = leptospirosis_cleaned.drop(columns=columns_to_drop)

In [9]:
leptospirosis_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
Index: 324 entries, 36 to 359
Data columns (total 49 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   tave              324 non-null    float64
 1   tmin              324 non-null    float64
 2   tmax              324 non-null    float64
 3   heat_index        324 non-null    float64
 4   pr                324 non-null    float64
 5   wind_speed        324 non-null    float64
 6   rh                324 non-null    float64
 7   solar_rad         324 non-null    float64
 8   uv_rad            324 non-null    float64
 9   dentist_nearest   324 non-null    float64
 10  doctors_nearest   324 non-null    float64
 11  hospital_nearest  324 non-null    float64
 12  optician_nearest  324 non-null    float64
 13  pharmacy_nearest  324 non-null    float64
 14  pop_count_total   324 non-null    float64
 15  pop_density_mean  324 non-null    float64
 16  dentist_count     324 non-null    float64
 17  d

In [10]:
leptospirosis_cleaned.to_csv('Cleaned_Leptospirosis_Data.csv')

In [11]:
typhoid = pd.read_csv('TYPHOID FEVER.csv')
typhoid.head()
print(typhoid.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2811 entries, 0 to 2810
Data columns (total 30 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Unnamed: 0        2811 non-null   int64  
 1   year              2811 non-null   int64  
 2   week              2811 non-null   int64  
 3   month             2811 non-null   int64  
 4   adm3_pcode        2811 non-null   object 
 5   adm3_en           2811 non-null   object 
 6   tave              2811 non-null   float64
 7   tmin              2811 non-null   float64
 8   tmax              2811 non-null   float64
 9   heat_index        2811 non-null   float64
 10  pr                2811 non-null   float64
 11  wind_speed        2811 non-null   float64
 12  rh                2811 non-null   float64
 13  solar_rad         2811 non-null   float64
 14  uv_rad            2811 non-null   float64
 15  clinic_nearest    1686 non-null   float64
 16  dentist_nearest   1686 non-null   float64


In [12]:
typhoid_cleaned = typhoid[typhoid['year'] >= 2013]
typhoid_cleaned = typhoid_cleaned.drop(['Unnamed: 0'], axis = 1)
typhoid_cleaned.fillna(0, inplace = True)
typhoid_cleaned = typhoid_cleaned.apply(lambda x: x.map('{:.2f}'.format) if x.dtype == 'float64' else x)
adm3_pcode_mapping = {
    'PH137401000': 1,
    'PH137503000': 2,
    'PH137603000': 3
}
typhoid_cleaned['adm3_pcode'] = typhoid_cleaned['adm3_pcode'].map(adm3_pcode_mapping)
typhoid_cleaned = typhoid_cleaned.drop('adm3_en', axis = 1)
typhoid_cleaned = typhoid_cleaned.applymap(lambda x: float(x))

typhoid_cleaned = typhoid_cleaned.groupby(['year','month','adm3_pcode']).mean()
climate_columns = ['tave','tmin','tmax','heat_index','pr','wind_speed','rh','solar_rad','uv_rad']
n_shifts = 3

for col in climate_columns:
    for shift in range(n_shifts*1, n_shifts*4, 3):
        typhoid_cleaned[f'{col}-{shift/3}'] = typhoid_cleaned[col].shift(shift)
typhoid_cleaned = typhoid_cleaned.reset_index()
typhoid_cleaned = typhoid_cleaned.dropna()
typhoid_cleaned = typhoid_cleaned[typhoid_cleaned['year'] > 2013]
columns_to_drop = ['year', 'week', 'month', 'adm3_pcode', 'clinic_count', 'clinic_nearest']
typhoid_cleaned = typhoid_cleaned.drop(columns=columns_to_drop)

In [13]:
typhoid_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
Index: 324 entries, 36 to 359
Data columns (total 49 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   tave              324 non-null    float64
 1   tmin              324 non-null    float64
 2   tmax              324 non-null    float64
 3   heat_index        324 non-null    float64
 4   pr                324 non-null    float64
 5   wind_speed        324 non-null    float64
 6   rh                324 non-null    float64
 7   solar_rad         324 non-null    float64
 8   uv_rad            324 non-null    float64
 9   dentist_nearest   324 non-null    float64
 10  doctors_nearest   324 non-null    float64
 11  hospital_nearest  324 non-null    float64
 12  optician_nearest  324 non-null    float64
 13  pharmacy_nearest  324 non-null    float64
 14  pop_count_total   324 non-null    float64
 15  pop_density_mean  324 non-null    float64
 16  dentist_count     324 non-null    float64
 17  d

In [14]:
typhoid_cleaned.to_csv('Cleaned_Typhoid_Data.csv')