## Example Solutions for GirlsWhoML Data Interpretability Workshop

In [None]:
# Load data and choose layer (preliminary)
import numpy as np

# 1) Download the .npz file from GitHub into your Colab environment.
!wget https://github.com/aideenfay/girlswhoml/blob/main/clean_reduced.npz?raw=true -O clean_reduced.npz
!wget https://github.com/aideenfay/girlswhoml/blob/main/poisoned_reduced.npz?raw=true -O poisoned_reduced.npz

clean_data = np.load("clean_reduced.npz")['arr_0']
poisoned_data = np.load("poisoned_reduced.npz")['arr_0']

# 2) Choose a layer to interpret
layer = 15
X_clean = clean_data[:, layer, :]   
X_poisoned = poisoned_data[:, layer, :]

1. PCA

In [None]:
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# Combine datasets and create labels
X = np.vstack([X_clean, X_poison])
y = np.concatenate([np.zeros(len(X_clean)), np.ones(len(X_poison))])

# 2. Apply PCA
pca = PCA(n_components=2)  # TODO: choose number of principal components (e.g., 2 for visualization)
X_pca = pca.fit_transform(X)

# 3. Check explained variance
print("Explained variance ratio of each component:", pca.explained_variance_ratio_)
print("Cumulative explained variance:", np.cumsum(pca.explained_variance_ratio_))

# 4. Visualize the first two principal components
plt.figure(figsize=(6,4))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap='coolwarm', alpha=0.7)
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.title('PCA projection (2 components)')
plt.colorbar(ticks=[0,1], label='Poisoned (0=clean, 1=poisoned)')
plt.show()

2. t-SNE
   

In [None]:
from sklearn.manifold import TSNE

# 1. Define t-SNE model
tsne = TSNE(n_components=2, perplexity=30 random_state=42)   

# 2. Fit-transform the data using t-SNE (this may take a few seconds)
X_tsne = tsne.fit_transform(X)

# 3. Plot t-SNE results
plt.figure(figsize=(6,4))
plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y, cmap='coolwarm', alpha=0.7)
plt.title(f"t-SNE projection (perplexity={tsne.perplexity})")
plt.xlabel('t-SNE dimension 1')
plt.ylabel('t-SNE dimension 2')
plt.colorbar(ticks=[0,1], label='Poisoned (0=clean, 1=poisoned)')
plt.show()

4. Persistent Homology and TDA

In [None]:
# The working code is provided in the student version of the notebook.

4. Logistic Regression Probe

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve

# ----------------------------
# 1) Create combined dataset X and label vector y
# ----------------------------
X = np.vstack([X_clean, X_poison])  # shape (400, 4096) if each subset is (200, 4096)
y = np.concatenate([
    np.zeros(len(X_clean)),    # 0 = clean
    np.ones(len(X_poison))     # 1 = poisoned
])

print("Combined data shape:", X.shape)
print("Labels shape:", y.shape)
print("Number of clean samples:", np.sum(y == 0))
print("Number of poisoned samples:", np.sum(y == 1))

# ----------------------------
# 2) Split into train/test sets
# ----------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.2, 
                                                    random_state=42)
print("Training set size:", X_train.shape[0], "Test set size:", X_test.shape[0])

# ----------------------------
# 3) Train logistic regression
# ----------------------------
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)

# ----------------------------
# 4) Evaluate on test set
# ----------------------------
y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {acc:.3f}")

# ----------------------------
# 5) Calculate ROC–AUC and plot
# ----------------------------
# 5a) Predict probability for positive class (poisoned=1)
y_proba = clf.predict_proba(X_test)[:, 1]  # Probability of class=1

# 5b) Compute ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_proba)
auc_score = roc_auc_score(y_test, y_proba)

print(f"ROC AUC Score: {auc_score:.3f}")

# 5c) Plot the ROC curve
plt.figure(figsize=(6, 5))
plt.plot(fpr, tpr, color='blue', label=f"ROC curve (AUC = {auc_score:.3f})")
plt.plot([0,1], [0,1], color='gray', linestyle='--', label="Random Guessing")

plt.title("ROC Curve - Logistic Regression")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc="lower right")
plt.show()


5. Graph Laplacian

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors
import numpy.linalg as la

# For a heatmap, we'll use seaborn  
!pip install seaborn
import seaborn as sns

!pip install sentence_transformers
from sentence_transformers import SentenceTransformer

###############################################################################
# 1) Larger set of ~30 sentences across sports, tech, and cooking
###############################################################################
sentences = [
    # Sports domain
    "The soccer match ended in a dramatic penalty shootout.",
    "Basketball players often exhibit impressive vertical jumps and agility.",
    "She set a new track record in the 100-meter dash.",
    "The tennis tournament drew huge crowds all week.",
    "He scored a touchdown on the final drive of the game.",
    "A well-placed corner kick led to the winning goal.",
    "Rugby is a physically demanding sport that requires endurance.",
    "Mountain biking trails can vary greatly in difficulty.",
    "The swimming relay team broke the national record.",
    "A marathon requires careful pacing and hydration strategies.",

    # Tech domain
    "Cloud computing services offer flexible resource scaling.",
    "Quantum computing promises exponential speedups for specific tasks.",
    "Machine learning algorithms thrive on large volumes of data.",
    "Cybersecurity threats evolve rapidly in modern networks.",
    "Neural networks draw inspiration from the human brain.",
    "Blockchain technology underpins many cryptocurrencies.",
    "Augmented reality applications are gaining popularity in gaming.",
    "Autonomous vehicles rely on sophisticated sensor fusion.",
    "The microchip shortage has impacted global supply chains.",
    "Robotics advancements continuously push the boundaries of automation.",

    # Cooking domain
    "Baking sourdough bread requires patience and a good starter.",
    "A slow cooker can simplify meal preparation for busy families.",
    "He garnished the salad with toasted almonds and feta cheese.",
    "Gluten-free baking often demands special flour blends.",
    "Homemade pasta sauce benefits from a long simmer and fresh herbs.",
    "Marinating the chicken overnight enhances flavor.",
    "She whipped up a batch of chocolate chip cookies from scratch.",
    "Roasted vegetables drizzled with olive oil can be quite addictive.",
    "The casserole combined layers of cheese, meat, and tomato sauce.",
    "Pancakes made with buttermilk tend to be fluffier."
]

# Print them out for reference
for i, s in enumerate(sentences):
    print(f"[{i}] {s}")
print(f"\nTotal sentences: {len(sentences)}")

# Assign domain labels for color-coding:
# We'll do 0 = Sports, 1 = Tech, 2 = Cooking.
domain_labels = (
    [0]*10  +  # first 10 are sports
    [1]*10  +  # next 10 are tech
    [2]*10     # last 10 are cooking
)
domain_names = {0: "Sports", 1: "Tech", 2: "Cooking"}

###############################################################################
# 2) Obtain LLM embeddings
###############################################################################
model_name = 'sentence-transformers/all-MiniLM-L6-v2'
embedder = SentenceTransformer(model_name)
X_emb = embedder.encode(sentences, show_progress_bar=False)
X_emb = np.array(X_emb)
print("Embeddings shape:", X_emb.shape)

###############################################################################
# 3) Build a k-NN graph in embedding space
###############################################################################
def build_knn_graph(X, k=5):
    """
    Constructs a k-NN adjacency matrix (binary, symmetrical).
    Adjust 'k' based on dataset size. 
    """
    n_samples = X.shape[0]
    nbrs = NearestNeighbors(n_neighbors=k, metric='euclidean')
    nbrs.fit(X)
    _, indices = nbrs.kneighbors(X)

    A = np.zeros((n_samples, n_samples))
    for i in range(n_samples):
        for j_idx in indices[i]:
            A[i, j_idx] = 1
            A[j_idx, i] = 1  # symmetrize
    return A

k_neighbors = 5
A = build_knn_graph(X_emb, k=k_neighbors)
print("Adjacency matrix shape:", A.shape)

###############################################################################
# 3a) Visualize adjacency matrix as a heatmap
###############################################################################
plt.figure(figsize=(8,6))
sns.heatmap(A, cmap='Greens', annot=False, xticklabels=False, yticklabels=False)
plt.title(f"Adjacency Matrix Heatmap (k={k_neighbors} NN)")
plt.show()

# If you want row/col labels, you can pass xticklabels=sentences or domain_labels,
# but it can be cluttered with 30 sentences.

###############################################################################
# 4) Compute the graph Laplacian L = D - A
###############################################################################
degrees = A.sum(axis=1)
D = np.diag(degrees)
L = D - A

# Eigen-decomposition
eigvals, eigvecs = la.eigh(L)
idx = np.argsort(eigvals)
eigvals = eigvals[idx]
eigvecs = eigvecs[:, idx]

print("Eigenvalues (sorted):", eigvals)
print("\nFirst 10 eigenvalues:", eigvals[:10])

# Plot the eigenvalue spectrum
plt.figure(figsize=(7,5))
plt.plot(eigvals, marker='o')
plt.title(f"Laplacian Eigenvalues (k={k_neighbors} NN)")
plt.xlabel("Eigenvalue Index")
plt.ylabel("Value")
plt.show()

###############################################################################
# 5) Fiedler Vector Partition
###############################################################################
fiedler_index = 1  # second smallest eigenvalue
if fiedler_index < len(eigvals):
    fiedler_vector = eigvecs[:, fiedler_index]
    groupA = np.where(fiedler_vector >= 0)[0]
    groupB = np.where(fiedler_vector < 0)[0]

    print("\n=== Partition by Fiedler Vector Sign ===")
    print("Group A indices:", groupA)
    print("Group B indices:", groupB)

    print("\nGroup A Sentences:")
    for idx in groupA:
        print(f"  - [{idx}] ({domain_names[domain_labels[idx]]}) {sentences[idx]}")

    print("\nGroup B Sentences:")
    for idx in groupB:
        print(f"  - [{idx}] ({domain_names[domain_labels[idx]]}) {sentences[idx]}")
else:
    print("Not enough eigenvalues to do a Fiedler partition. Check adjacency or data size.")

###############################################################################
# 6) 2D Spectral Embedding (Eigenvectors 2 & 3)
###############################################################################
if len(eigvals) > 2:
    x_coords = eigvecs[:, 1]  # second eigenvector
    y_coords = eigvecs[:, 2]  # third eigenvector

    plt.figure(figsize=(7,5))
    # We'll color each point by domain_labels, so we can see if it separates sports/tech/cooking
    scatter = plt.scatter(x_coords, y_coords, c=domain_labels, cmap='Accent', alpha=0.8)

    # Annotate points by index so we can reference them
    for i in range(len(sentences)):
        plt.annotate(str(i), (x_coords[i]+0.01, y_coords[i]+0.01))

    cbar = plt.colorbar(scatter, ticks=[0,1,2])
    cbar.ax.set_yticklabels(['Sports', 'Tech', 'Cooking'])
    plt.title("Spectral Embedding (Eigenvectors 2 & 3)\nColored by Domain Label")
    plt.xlabel("2nd Eigenvector")
    plt.ylabel("3rd Eigenvector")
    plt.show()
else:
    print("Not enough eigenvectors to create a 2D spectral embedding.")
