In [3]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import sklearn.metrics as metrics
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from sklearn.model_selection import train_test_split
import platform

In [4]:
#Reading the pre_processed data
os_type = platform.system()
if os_type.startswith("Darwin"):
    data = pd.read_excel(r"dataset/processed_data/main/final_cleaned_dataset.xlsx")
else: 
    data = pd.read_excel(r"dataset\processed_data\main\final_cleaned_dataset.xlsx")
#Droping the target col
feature_value_temp = data.drop("Laboratory confirmed, since the beginning of the pandemic Hospitalized", axis=1)
#Droping the Distric col as it is string and will be assigned value by clustering
features = feature_value_temp.drop("District",axis=1)
features = features.drop("Unnamed: 0",axis=1)
features = features.drop("Laboratory confirmed, since the beginning of the pandemic TOTAL",axis=1)
features = features.drop("Laboratory confirmed, since the beginning of the pandemic RecoveredA",axis=1)
features = features.drop("Laboratory confirmed, since the beginning of the pandemic Deceased",axis=1)


data['flag'] = 3  # Initialize the 'flag' column with 0
data.loc[data['Laboratory confirmed, since the beginning of the pandemic Hospitalized'] <= 449, 'flag'] = 2
data.loc[data['Laboratory confirmed, since the beginning of the pandemic Hospitalized'] <= 290, 'flag'] = 1

target_col = data["flag"]

In [5]:
from math import sqrt
r2_score = metrics.r2_score

# Splitting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(features, target_col, test_size=0.3, random_state=42)

# Instantiating the gradient boosting regressor model
gb_reg = DecisionTreeClassifier()

# Fitting the model to the training data
gb_reg.fit(X_train, y_train)

# Predicting the test set results
y_pred = gb_reg.predict(X_test)

# Evaluating the model performance
mse =  metrics.mean_squared_error(y_test, y_pred)
rmse = sqrt(mse)
print(f"Root Mean Squared Error: {rmse}")
print(f"Mean Squared Error:{mse}")
print("R2 Score:", metrics.r2_score(y_test, y_pred))
# Calculate accuracy, recall, precision, and F1 score
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred,average='weighted')
precision = precision_score(y_test, y_pred,average='weighted')
f1 = f1_score(y_test, y_pred,average='weighted')

print(f"Accuracy: {accuracy:.2f}")
print(f"Recall: {recall:.2f}")
print(f"Precision: {precision:.2f}")
print(f"F1 Score: {f1:.2f}")

Root Mean Squared Error: 0.6837141726337691
Mean Squared Error:0.4674650698602794
R2 Score: 0.3250124085580738
Accuracy: 0.70
Recall: 0.70
Precision: 0.70
F1 Score: 0.70


In [7]:
import os
import csv
csv_file = "model_metrics.csv"
model_name= "Gradient Boosting Classifier"
# Check if the CSV file exists, and create it with a header if it doesn't
if not os.path.exists(csv_file):
    with open(csv_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Model Name", "Accuracy", "Recall", "Precision", "F1 Score","Mse","RMSE"])

# Append the values to the CSV file
with open(csv_file, mode='a', newline='') as file:
    writer = csv.writer(file)
    writer.writerow([model_name, accuracy, recall, precision, f1,mse,rmse])

print(f"Metrics appended to {csv_file} for {model_name}.")

Metrics appended to model_metrics.csv for Gradient Boosting Classifier.
