In [9]:
import os
import platform
import pandas as pd
import numpy as np
import warnings
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from art.estimators.classification import SklearnClassifier
from art.attacks.evasion import ZooAttack

# Suppress warnings
warnings.filterwarnings('ignore')

In [10]:
# Function to load the dataset based on the operating system
def load_dataset():
    os_type = platform.system()
    if os_type.startswith("Darwin"):
        dataset_path = "dataset/processed_data/main/final_cleaned_dataset.xlsx"
    else:
        dataset_path = "dataset\\processed_data\\main\\final_cleaned_dataset.xlsx"
    return pd.read_excel(dataset_path)

In [11]:
# Data preprocessing
def preprocess_data(data):
    # Drop target and irrelevant columns
    features = data.drop(columns=[
        "Laboratory confirmed, since the beginning of the pandemic Hospitalized",
        "District", "Unnamed: 0", "Laboratory confirmed, since the beginning of the pandemic TOTAL",
        "Laboratory confirmed, since the beginning of the pandemic RecoveredA",
        "Laboratory confirmed, since the beginning of the pandemic Deceased"
    ])
    
    # Initialize the 'flag' column based on specified conditions
    data['flag'] = 3
    data.loc[data['Laboratory confirmed, since the beginning of the pandemic Hospitalized'] <= 449, 'flag'] = 2
    data.loc[data['Laboratory confirmed, since the beginning of the pandemic Hospitalized'] <= 290, 'flag'] = 1

    # Define features and target column
    target_col = data["flag"]

    return features, target_col

In [12]:
# Read the data from the Excel file
data = load_dataset()
features, target_col = preprocess_data(data)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target_col, test_size=0.3, random_state=29)

# Train a GradientBoostingClassifier model
model = GradientBoostingClassifier(n_estimators=50, random_state=22)
model.fit(X=X_train, y=y_train)

# Create ART classifier for scikit-learn GradientBoostingClassifier
art_classifier = SklearnClassifier(model=model)

# Create ART Zeroth Order Optimization attack
zoo = ZooAttack(classifier=art_classifier, confidence=0.0, targeted=False, learning_rate=1e-1, max_iter=20,
                binary_search_steps=10, initial_const=1e-3, abort_early=True, use_resize=False,
                use_importance=False, nb_parallel=1, batch_size=1, variable_h=0.2)

# Generate adversarial samples with ART Zeroth Order Optimization attack
x_train_adv = zoo.generate(X_train.values)

ZOO: 100%|████████████████████████████████| 11690/11690 [01:58<00:00, 98.82it/s]


In [13]:
x_test_adv = zoo.generate(X_test.values)
np.save('zoo_gbc_train_adv.npy', x_test_adv )
np.save('zoo_gbc_test_adv.npy', x_test_adv )

ZOO: 100%|██████████████████████████████████| 5010/5010 [00:51<00:00, 97.58it/s]


In [14]:
# Evaluate model scores
def evaluate_model(model, X, y):
    score = model.score(X, y)
    return score

In [15]:
# Evaluate the model on various datasets
scores = {
    "Normal Training Score": evaluate_model(model, X_train, y_train),
    "Adversarial Training Score": evaluate_model(model, x_train_adv, y_train),
    "Normal Test Score": evaluate_model(model, X_test, y_test),
    "Adversarial Test Score": evaluate_model(model, x_test_adv, y_test)
}

for score_name, score_value in scores.items():
    print(f"{score_name}: {score_value:.4f}")

# Predict using the model and calculate performance metrics
y_pred = model.predict(X_test)

# Calculate accuracy, recall, precision, and F1 score
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred, average='weighted')
precision = precision_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.2f}")
print(f"Recall: {recall:.2f}")
print(f"Precision: {precision:.2f}")
print(f"F1 Score: {f1:.2f}")

Normal Training Score: 0.7134
Adversarial Training Score: 0.7084
Normal Test Score: 0.6866
Adversarial Test Score: 0.6836
Accuracy: 0.69
Recall: 0.69
Precision: 0.66
F1 Score: 0.66
