In [1]:
import numpy as np
import pandas as pd 
import seaborn as sns 
import matplotlib.pyplot as plt 
import warnings
from scipy.stats import f_oneway
warnings.filterwarnings('ignore')

In [2]:
dengue = pd.read_csv('dengue.csv')
dengue

Unnamed: 0,Month,Year,Region,Dengue_Cases,Dengue_Deaths
0,January,2016,Region I,705,1
1,February,2016,Region I,374,0
2,March,2016,Region I,276,0
3,April,2016,Region I,240,2
4,May,2016,Region I,243,1
...,...,...,...,...,...
1015,August,2020,BARMM,91,0
1016,September,2020,BARMM,16,8
1017,October,2020,BARMM,13,9
1018,November,2020,BARMM,15,1


In [3]:
dengue.dtypes

Month            object
Year              int64
Region           object
Dengue_Cases      int64
Dengue_Deaths     int64
dtype: object

In [9]:
dengue['Year'] = dengue['Year'].astype('int')
dengue['Month'] = dengue['Month'].astype('str')

In [10]:
dengue.dtypes

Month            object
Year              int64
Region           object
Dengue_Cases      int64
Dengue_Deaths     int64
dtype: object

In [11]:
# Combine Month and Year into a Date column
dengue['Date'] = pd.to_datetime(dengue['Month'] + ' ' + dengue['Year'].astype(str))

# Optional: sort by date
dengue = dengue.sort_values('Date')

In [13]:
region_df = dengue[dengue['Region'] == 'Region IV-A']
monthly_death = region_df.groupby('Date')['Dengue_Deaths'].sum().reset_index()

In [15]:
from sklearn.linear_model import LinearRegression
import numpy as np

# Convert dates to ordinal for regression

monthly_death['Date_Ordinal'] = monthly_cases['Date'].map(pd.Timestamp.toordinal)

# # # Reshape for sklearn
X = monthly_death['Date_Ordinal'].values.reshape(-1, 1) #independent variable 
y = monthly_death['Dengue_Deaths'].values   #dependent variable 

# # # Fit model
model = LinearRegression()
model.fit(X, y)

# # # Predict
monthly_death['Predicted_Cases'] = model.predict(X)

# # # Evaluate: R-squared
r_squared = model.score(X, y)
print(f"The model explains {r_squared:.1%} of the variation in dengue death.")


The model explains 0.2% of the variation in dengue death.


In [16]:
monthly_death

Unnamed: 0,Date,Dengue_Deaths,Date_Ordinal,Predicted_Cases
0,2016-01-01,14,735964,10.035293
1,2016-02-01,5,735995,10.064005
2,2016-03-01,9,736024,10.090864
3,2016-04-01,1,736055,10.119576
4,2016-05-01,1,736085,10.147362
5,2016-06-01,4,736116,10.176073
6,2016-07-01,6,736146,10.203859
7,2016-08-01,2,736177,10.232571
8,2016-09-01,28,736208,10.261282
9,2016-10-01,20,736238,10.289068


In [17]:
pip install scikit-learn

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [20]:
dengue['Outbreak'] = (dengue['Dengue_Cases'] > 5000).astype(int)

In [21]:
dengue

Unnamed: 0,Month,Year,Region,Dengue_Cases,Dengue_Deaths,Date,Outbreak
0,January,2016,Region I,705,1,2016-01-01,0
840,January,2016,NCR,1261,4,2016-01-01,0
780,January,2016,Region XIII,1116,6,2016-01-01,0
240,January,2016,Region IV-B,203,1,2016-01-01,0
120,January,2016,Region III,1976,3,2016-01-01,0
...,...,...,...,...,...,...,...
119,December,2020,Region II,96,0,2020-12-01,0
899,December,2020,NCR,658,4,2020-12-01,0
59,December,2020,Region I,1081,1,2020-12-01,0
659,December,2020,Region X,281,1,2020-12-01,0


In [22]:
# Encode categorical features
df_encoded = pd.get_dummies(dengue[['Region', 'Month']], drop_first=True)

# Combine with numerical features
df_model = pd.concat([df_encoded, dengue[['Year', 'Dengue_Cases', 'Outbreak']]], axis=1)

In [23]:
df_encoded

Unnamed: 0,Region_CAR,Region_NCR,Region_Region I,Region_Region II,Region_Region III,Region_Region IV-A,Region_Region IV-B,Region_Region IX,Region_Region V,Region_Region VI,...,Month_December,Month_February,Month_January,Month_July,Month_June,Month_March,Month_May,Month_November,Month_October,Month_September
0,False,False,True,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
840,False,True,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
780,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
240,False,False,False,False,False,False,True,False,False,False,...,False,False,True,False,False,False,False,False,False,False
120,False,False,False,False,True,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119,False,False,False,True,False,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
899,False,True,False,False,False,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
59,False,False,True,False,False,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
659,False,False,False,False,False,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False


In [24]:
from sklearn.model_selection import train_test_split

X = df_model.drop('Outbreak', axis=1)
y = df_model['Outbreak']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [26]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter=5000)
model.fit(X_train, y_train)


0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,5000


In [27]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred = model.predict(X_test)

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[197   0]
 [  0   7]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       197
           1       1.00      1.00      1.00         7

    accuracy                           1.00       204
   macro avg       1.00      1.00      1.00       204
weighted avg       1.00      1.00      1.00       204



In [36]:
def classify_severity(cases):
    if cases < 500:
        return 0
    elif cases <= 5000:
        return 1
    else:
        return 2

dengue['Severity'] = dengue['Dengue_Cases'].apply(classify_severity)

In [37]:
dengue.dtypes

Month                    object
Year                      int64
Region                   object
Dengue_Cases              int64
Dengue_Deaths             int64
Date             datetime64[ns]
Outbreak                  int64
Severity                  int64
dtype: object

In [38]:
# Encode categorical features
df_encoded = pd.get_dummies(dengue[['Region', 'Month']], drop_first=True)

# Combine with numerical features
df_model = pd.concat([df_encoded, dengue[['Year', 'Dengue_Cases', 'Severity']]], axis=1)
df_model

Unnamed: 0,Region_CAR,Region_NCR,Region_Region I,Region_Region II,Region_Region III,Region_Region IV-A,Region_Region IV-B,Region_Region IX,Region_Region V,Region_Region VI,...,Month_July,Month_June,Month_March,Month_May,Month_November,Month_October,Month_September,Year,Dengue_Cases,Severity
0,False,False,True,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,2016,705,1
840,False,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,2016,1261,1
780,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,2016,1116,1
240,False,False,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,2016,203,0
120,False,False,False,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,2016,1976,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119,False,False,False,True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,2020,96,0
899,False,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,2020,658,1
59,False,False,True,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,2020,1081,1
659,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,2020,281,0


In [39]:
from sklearn.model_selection import train_test_split

X = df_model.drop('Severity', axis=1)
y = df_model['Severity']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [40]:
from sklearn.linear_model import LogisticRegression

# Multinomial logistic regression
model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=5000)
model.fit(X_train, y_train)


0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,5000


In [42]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred = model.predict(X_test)

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


[[97  1  0]
 [ 0 99  0]
 [ 0  0  7]]
              precision    recall  f1-score   support

           0       1.00      0.99      0.99        98
           1       0.99      1.00      0.99        99
           2       1.00      1.00      1.00         7

    accuracy                           1.00       204
   macro avg       1.00      1.00      1.00       204
weighted avg       1.00      1.00      1.00       204

