In [27]:
import pandas as pd

In [44]:
# Preforming a membership inference attack on protest classifiers.

# First I need to import the CSV file for both UCLA Protest and VGKG predictions
# Repeat for classifier trained on generated imagery

ucla_df = pd.read_csv('./membership_csvs/result_ucla_stylvgkg.csv')
vgkg_df = pd.read_csv('./membership_csvs/result_vgkg_stylvgkg.csv')

ucla_df['label'] = 1  # UCLA label is 1
vgkg_df['label'] = 0  # VGKG label is 0

# Then I need to combine them and remove the imgpth label
# We want to add a label for binary classification (0 for VGKG and 1 for UCLA)
# Combine the DataFrames into a single DataFrame
combined_df = pd.concat([ucla_df, vgkg_df], ignore_index=True)

# Remove the 'imgpth' label column
combined_df.drop(columns=['imgpath'], inplace=True)

#print(combined_df)
# Then train sklearn classifiers such as random forest and MLP models

# Then I need to evaluate using accuracy_score, precision_score, recall_score

In [45]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, homogeneity_score, completeness_score, rand_score
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

X_train, X_test, y_train, y_test = train_test_split(combined_df.drop('label', axis=1), combined_df['label'], test_size=0.2, random_state=42)

# Train a Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42, criterion='log_loss')
rf_classifier.fit(X_train, y_train)

# Train a MLP Classifier
mlp_classifier = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=42)
mlp_classifier.fit(X_train, y_train)

# Predictions
rf_predictions = rf_classifier.predict(X_test)
mlp_predictions = mlp_classifier.predict(X_test)

#Unsupervised
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_train)

# Apply KMeans clustering
kmeans = KMeans(n_clusters=2, random_state=42)  # Assuming you want 2 clusters (VGKG and UCLA)
kmeans.fit(X_scaled)
kmeans_predicitons = kmeans.predict(X_test)



In [46]:
# Evaluation
rf_accuracy = accuracy_score(y_test, rf_predictions)
mlp_accuracy = accuracy_score(y_test, mlp_predictions)
kmeans_accuracy = accuracy_score(y_test, kmeans_predicitons)

rf_precision = precision_score(y_test, rf_predictions)
mlp_precision = precision_score(y_test, mlp_predictions)

rf_recall = recall_score(y_test, rf_predictions)
mlp_recall = recall_score(y_test, mlp_predictions)

kmeans_randindex = rand_score(y_test, kmeans_predicitons)
kmeans_homogeneity = homogeneity_score(y_test, kmeans_predicitons)
kmeans_completeness = completeness_score(y_test, kmeans_predicitons)

print("Random Forest Classifier:")
print(f"Accuracy: {rf_accuracy:.4f}, Precision: {rf_precision:.4f}, Recall: {rf_recall:.4f}")

print("\nMLP Classifier:")
print(f"Accuracy: {mlp_accuracy:.4f}, Precision: {mlp_precision:.4f}, Recall: {mlp_recall:.4f}")

print("\nKmeans Classifier:")
print(f"Accuracy: {kmeans_accuracy:.4f}, Rand Index: {kmeans_randindex:.4f}, Homogeneity: {kmeans_homogeneity:.4f}, completeness: {kmeans_completeness:.4f}")

# Calculate AUC-ROC for Random Forest Classifier
rf_probs = rf_classifier.predict_proba(X_test)[:, 1]
rf_auc_roc = roc_auc_score(y_test, rf_probs)
print(rf_auc_roc)

# Calculate AUC-ROC for MLP Classifier
mlp_probs = mlp_classifier.predict_proba(X_test)[:, 1]
mlp_auc_roc = roc_auc_score(y_test, mlp_probs)
print(mlp_auc_roc)

Random Forest Classifier:
Accuracy: 0.6145, Precision: 0.6320, Recall: 0.5702

MLP Classifier:
Accuracy: 0.6280, Precision: 0.6492, Recall: 0.5761

Kmeans Classifier:
Accuracy: 0.4940, Rand Index: 0.4998, Homogeneity: 0.0000, completeness: 1.0000
0.6745126298186939
0.6852636779696276


In [48]:
import numpy as np
a = X_test.to_numpy()
maxes = np.amax(a, axis=1)
pred = np.where(maxes >= 0.95, 1, 0)
print(pred)
print(y_test.to_numpy())

print(accuracy_score(y_test, pred))
print(precision_score(y_test, pred))
print(recall_score(y_test, pred))



[1 1 1 ... 1 0 1]
[0 1 1 ... 0 1 0]
0.567
0.5603305785123966
0.6699604743083004
