# Data Visualization and Metrics on the CelebA_HQ Dataset

In [53]:
import pandas as pd
import sklearn as sns
import numpy as np
import matplotlib.pyplot as plt

## Populate Dataframes

Each dataframe consists of 5 columns: `image`, `image_path`, `actual_label`, `predicted_label`, `confidence`
* `image` is the image name, for example 10.jpg
* `image_path` is the path to the image
* `actual_label` is the actual label for that person (the original image)
* `predicted_label` is the result of running the original, attacked, or defended image into the classifier. This is the label used to determine if the attack or defense was effective.
* `confidence` is the confidence in which we think that the image belongs to that class. This is the max value of the prediction.

In [54]:
fgsm05_attack_df = pd.read_csv("/home/grads/hassledw/StyleCLIP_Defense/CelebA_HQ-Labeled/FGSM05.csv")
fgsm05_defend_df = pd.read_csv("/home/grads/hassledw/StyleCLIP_Defense/CelebA_HQ-Labeled/StyleCLIP-FGSM05.csv")
fgsm05_detected_df = pd.read_csv("/home/grads/hassledw/StyleCLIP_Defense/CelebA_HQ-Labeled/StyleCLIP-FGSM05-detected.csv")

fgsm10_attack_df = pd.read_csv("/home/grads/hassledw/StyleCLIP_Defense/CelebA_HQ-Labeled/FGSM10.csv")
fgsm10_defend_df = pd.read_csv("/home/grads/hassledw/StyleCLIP_Defense/CelebA_HQ-Labeled/StyleCLIP-FGSM10.csv")
fgsm10_detected_df = pd.read_csv("/home/grads/hassledw/StyleCLIP_Defense/CelebA_HQ-Labeled/StyleCLIP-FGSM10-detected.csv")

pgd1010_attack_df = pd.read_csv("/home/grads/hassledw/StyleCLIP_Defense/CelebA_HQ-Labeled/PGD1010.csv")
pgd1010_defend_df = pd.read_csv("/home/grads/hassledw/StyleCLIP_Defense/CelebA_HQ-Labeled/StyleCLIP-PGD1010.csv")
pgd1010_detected_df = pd.read_csv("/home/grads/hassledw/StyleCLIP_Defense/CelebA_HQ-Labeled/StyleCLIP-PGD1010-detected.csv")

pgd2010_attack_df = pd.read_csv("/home/grads/hassledw/StyleCLIP_Defense/CelebA_HQ-Labeled/PGD2010.csv")
pgd2010_defend_df = pd.read_csv("/home/grads/hassledw/StyleCLIP_Defense/CelebA_HQ-Labeled/StyleCLIP-PGD2010.csv")
pgd2010_detected_df = pd.read_csv("/home/grads/hassledw/StyleCLIP_Defense/CelebA_HQ-Labeled/StyleCLIP-PGD2010-detected.csv")

fgsm50_attack_df = pd.read_csv("/home/grads/hassledw/StyleCLIP_Defense/CelebA_HQ-Labeled/FGSM50.csv")
fgsm50_defend_df = pd.read_csv("/home/grads/hassledw/StyleCLIP_Defense/CelebA_HQ-Labeled/StyleCLIP-FGSM50.csv")
fgsm50_detected_df = pd.read_csv("/home/grads/hassledw/StyleCLIP_Defense/CelebA_HQ-Labeled/StyleCLIP-FGSM50-detected.csv")


## Defense Metrics

In [82]:
def print_defense_metrics(attackname, attack_df, defense_df, detected_df):
   total_images = len(attack_df)
   
   # Attacked image detection with StyleCLIP.
   attack_detection = pd.merge(detected_df, defense_df, on=['image'], how='inner').drop(columns=['Unnamed: 0_x', 'image_path_x', 'Unnamed: 0_y', 'image_path_y',
       'actual_label', 'predicted_label', 'confidence'])
   
   # Filter out detected images in defended_df dataset.
   defense_df = pd.merge(detected_df, defense_df, on=['image'], how='outer')
   defense_df = defense_df[defense_df.isna().any(axis=1)].drop(columns=['Unnamed: 0_x', 'image_path_x', 'Unnamed: 0_y'])
   defense_df = defense_df.rename(columns={'image_path_y': 'image_path'})
   # Successful attacks
   attack_result = attack_df[attack_df["actual_label"] != attack_df["predicted_label"]]

   # Successful defends inclusive of unsuccessful misclassfication in the attacks.
   defense_result = defense_df[defense_df["actual_label"] == defense_df["predicted_label"]]

   # Successful attacks and defenses joined. _x data is the attack, _y is the defense.
   attack_defense_join = pd.merge(attack_result, defense_df, on=['image'], how='inner')

   # Successfully reclassified images after successful attack.
   attack_defense_success = attack_defense_join[attack_defense_join["actual_label_x"] == attack_defense_join["predicted_label_y"]]
   
   # Instances when the defense was too weak. When a successful attack happened, the defense generated an image with the same label as the attack.
   attack_defense_weak = attack_defense_join[attack_defense_join["predicted_label_x"] == attack_defense_join["predicted_label_y"]]

   print("-" * 50)
   print(f"RUNNING {attackname} DEFENSE METRICS...\n")
   print(f"Attack Effectiveness: {len(attack_result) / len(attack_df) * 100:.2f}%\n")

   print(f"StyleCLIP Sanitization Effectiveness (on ALL defendable images): {len(defense_result) / len(defense_df) * 100:.2f}%")
   print(f"StyleCLIP Sanitization Effectiveness (on successful attacks): {len(attack_defense_success) / len(attack_defense_join) * 100:.2f}%")
   print(f"Attacked Image Detection w/ StyleCLIP: {len(attack_detection) / total_images * 100:.2f}%")
   print(f"Misclassification on StyleCLIP Images: {100 - (len(defense_result) / len(defense_df) * 100):.2f}%")
   print(f"Successful Attacks Where Defense TOO Weak: {len(attack_defense_weak) / len(attack_defense_join) * 100:.2f}%")
   print(f"Total StyleCLIP Dataset Retention: {(len(attack_detection) + len(defense_result)) / total_images * 100:.2f}%")
   print("-" * 50)

In [83]:
print_defense_metrics("FGSM05", fgsm05_attack_df, fgsm05_defend_df, fgsm05_detected_df)
print_defense_metrics("FGSM10", fgsm10_attack_df, fgsm10_defend_df, fgsm10_detected_df)
print_defense_metrics("PGD1010", pgd1010_attack_df, pgd1010_defend_df, pgd1010_detected_df)
print_defense_metrics("PGD2010", pgd2010_attack_df, pgd2010_defend_df, pgd2010_detected_df)
print_defense_metrics("FGSM50", fgsm50_attack_df, fgsm50_defend_df, fgsm50_detected_df)

--------------------------------------------------
RUNNING FGSM50 DEFENSE METRICS...

Attack Effectiveness: 96.48%

StyleCLIP Sanitization Effectiveness (on ALL defendable images): 0.00%
StyleCLIP Sanitization Effectiveness (on successful attacks): 0.00%
Attacked Image Detection w/ StyleCLIP: 54.27%
Misclassification on StyleCLIP Images: 100.00%
Successful Attacks Where Defense TOO Weak: 0.00%
Total StyleCLIP Dataset Retention: 54.27%
--------------------------------------------------
