In [1]:
import random
import platform
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

In [2]:
def load_and_preprocess():
    #Reading the pre_processed data
    os_type = platform.system()
    if os_type.startswith("Darwin"):
        data = pd.read_excel(r"dataset/processed_data/main/final_cleaned_dataset.xlsx")
    else: 
        data = pd.read_excel(r"dataset\processed_data\main\final_cleaned_dataset.xlsx")
    #Droping the target col
    feature_value_temp = data.drop("Laboratory confirmed, since the beginning of the pandemic Hospitalized", axis=1)
    #Droping the Distric col as it is string and will be assigned value by clustering
    features = feature_value_temp.drop("District",axis=1)
    features = features.drop("Unnamed: 0",axis=1)
    features = features.drop("Laboratory confirmed, since the beginning of the pandemic TOTAL",axis=1)
    features = features.drop("Laboratory confirmed, since the beginning of the pandemic RecoveredA",axis=1)
    features = features.drop("Laboratory confirmed, since the beginning of the pandemic Deceased",axis=1)


    data['flag'] = 3  # Initialize the 'flag' column with 3
    data.loc[data['Laboratory confirmed, since the beginning of the pandemic Hospitalized'] <= 449, 'flag'] = 2
    data.loc[data['Laboratory confirmed, since the beginning of the pandemic Hospitalized'] <= 290, 'flag'] = 1

    target_col = data["flag"]
    
    return features, np.array(target_col)

In [3]:
def flip_label(current_label):
    altered = (current_label + 1) % 3
    return altered + 1 if altered == 0 else altered

def label_flipping_poisoning(y_train, poison_rate):
    alteration_count = int(len(y_train) * (poison_rate / 100))
    alteration_indexes = random.sample(range(0, len(y_train)), alteration_count)
    for index in alteration_indexes:
        y_train[index] = flip_label(y_train[index])
    return y_train

In [16]:
features, target_col = load_and_preprocess()
X_train, X_test, y_train, y_test = train_test_split(features, target_col, test_size=0.3, random_state=29)
# label flipping poisoning to 25 percent data
poisoned_target_col = label_flipping_poisoning(np.copy(y_train), 25)

In [17]:
model = RandomForestClassifier(n_estimators=50, random_state= 22)
model.fit(X=X_train, y=y_train)

y_pred = model.predict(X_test)

# Calculate accuracy, recall, precision, and F1 score
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred,average='weighted')
precision = precision_score(y_test, y_pred,average='weighted')
f1 = f1_score(y_test, y_pred,average='weighted')

print(f"Accuracy: {accuracy:.2f}")
print(f"Recall: {recall:.2f}")
print(f"Precision: {precision:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.77
Recall: 0.77
Precision: 0.76
F1 Score: 0.77


In [18]:
model.fit(X=X_train, y=poisoned_target_col)

y_pred = model.predict(X_test)

# Calculate accuracy, recall, precision, and F1 score
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred,average='weighted')
precision = precision_score(y_test, y_pred,average='weighted')
f1 = f1_score(y_test, y_pred,average='weighted')

print(f"Accuracy: {accuracy:.2f}")
print(f"Recall: {recall:.2f}")
print(f"Precision: {precision:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.68
Recall: 0.68
Precision: 0.70
F1 Score: 0.68
