In [None]:
# prompt: mount drive

from google.colab import drive

drive.mount('/content/drive')


Drive not mounted, so nothing to flush and unmount.


In [None]:
import zipfile
import os

# Path to your uploaded ZIP file in Colab
zip_path = "/content/drive/MyDrive/LinearProject/AAAZ.zip"
extract_dir = "/content/drive/MyDrive/LinearProject"

# Create the destination directory if it doesn't exist
os.makedirs(extract_dir, exist_ok=True)

# Unzip the file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

print(f"✅ Unzipped files to: {extract_dir}")

KeyboardInterrupt: 

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
from tqdm import tqdm

# Configuration
vggface2_path = "/content/drive/MyDrive/LinearProject"  # Replace with your dataset path
MAX_PEOPLE = 60#####
MAX_IMAGES_PER_PERSON = 500 ######
IMAGE_SIZE = (128, 128)

# ============================
# 📦 Load VGGFace2 Images
# ============================
def load_vggface2_images(path, image_size=(128, 128)):
    images, labels = [], []
    label_map = {}
    current_label = 0

    for celeb_folder in tqdm(sorted(os.listdir(path)), desc="Loading"):
        if current_label >= MAX_PEOPLE:
            break
        celeb_path = os.path.join(path, celeb_folder)
        if not os.path.isdir(celeb_path):
            continue
        label_map[current_label] = celeb_folder
        count = 0
        for img_file in os.listdir(celeb_path):
            if count >= MAX_IMAGES_PER_PERSON:
                break
            img_path = os.path.join(celeb_path, img_file)
            img = cv2.imread(img_path)
            if img is None:
                continue
            try:
                img = cv2.resize(img, image_size)
                img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                img_flat = img_gray.flatten() / 255.0  # Normalize
                images.append(img_flat)
                labels.append(current_label)
                count += 1
            except:
                continue
        current_label += 1

    return np.array(images), np.array(labels), label_map

# Load data
X, y, label_map = load_vggface2_images(vggface2_path, IMAGE_SIZE)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, stratify=y, random_state=42)

# PCA
pca = PCA(n_components=50)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# SVM with grid search
svm = GridSearchCV(SVC(), {'C': [1, 10], 'kernel': ['linear', 'rbf']}, cv=3, n_jobs=-1)
svm.fit(X_train_pca, y_train)
best_model = svm.best_estimator_

# Predict
y_pred = best_model.predict(X_test_pca)

# Evaluation
print("\n✅ Evaluation:")
print(f"Accuracy:  {accuracy_score(y_test, y_pred):.4f}")
print(f"Precision: {precision_score(y_test, y_pred, average='weighted'):.4f}")
print(f"Recall:    {recall_score(y_test, y_pred, average='weighted'):.4f}")
print(f"F1 Score:  {f1_score(y_test, y_pred, average='weighted'):.4f}")

# Label names
class_names = [label_map[i] for i in sorted(label_map)]

# Report
print("\n📋 Classification Report:")
print(classification_report(y_test, y_pred, target_names=class_names))

# Confusion Matrix
plt.figure(figsize=(10, 8))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

# ============================
# 🧠 Predict & Visualize Random Test Image
# ============================
def visualize_prediction(index=0):
    img_pca = X_test_pca[index].reshape(1, -1)
    pred_label = best_model.predict(img_pca)[0]
    true_label = y_test[index]
    pred_name = label_map[pred_label]
    true_name = label_map[true_label]

    original_flat = pca.inverse_transform(img_pca).reshape(IMAGE_SIZE)

    plt.figure(figsize=(6, 6))
    plt.imshow(original_flat, cmap='gray')
    plt.title(f"Predicted: {pred_name} | True: {true_name}")
    plt.axis('off')
    plt.show()

# Show prediction on first test image
visualize_prediction(index=0)

Loading: 100%|██████████| 61/61 [13:49<00:00, 13.59s/it]


In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import cv2

from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression  # You can swap with SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing import image

# Configuration
vggface2_path = "/content/drive/MyDrive/LinearProject"  # Replace with your dataset path
MAX_PEOPLE = 60
MAX_IMAGES_PER_PERSON = 500
IMAGE_SIZE = (224, 224)  # ResNet requires 224x224 input

# Load ResNet50 model (pretrained)
resnet = ResNet50(weights='imagenet', include_top=False, pooling='avg')  # Output shape: (2048,)

# =======================================
# 📆 Load Images and Extract ResNet Features
# =======================================
def extract_resnet_features(path, image_size=(224, 224)):
    features, labels = [], []
    label_map = {}
    current_label = 0

    for celeb_folder in tqdm(sorted(os.listdir(path)), desc="Extracting"):
        if current_label >= MAX_PEOPLE:
            break
        celeb_path = os.path.join(path, celeb_folder)
        if not os.path.isdir(celeb_path):
            continue
        label_map[current_label] = celeb_folder
        count = 0
        for img_file in os.listdir(celeb_path):
            if count >= MAX_IMAGES_PER_PERSON:
                break
            img_path = os.path.join(celeb_path, img_file)
            try:
                img = image.load_img(img_path, target_size=image_size)
                img_array = image.img_to_array(img)
                img_array = np.expand_dims(img_array, axis=0)
                img_array = preprocess_input(img_array)

                feat = resnet.predict(img_array, verbose=0)
                features.append(feat.flatten())
                labels.append(current_label)
                count += 1
            except:
                continue
        current_label += 1

    return np.array(features), np.array(labels), label_map

# Load data
X, y, label_map = extract_resnet_features(vggface2_path, IMAGE_SIZE)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, stratify=y, random_state=42)

# PCA
pca = PCA(n_components=100)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# Classifier
clf = LogisticRegression(max_iter=1000)  # Replace with SVC if needed
clf.fit(X_train_pca, y_train)
y_pred = clf.predict(X_test_pca)

# Evaluation
print("\n✅ Evaluation:")
print(f"Accuracy:  {accuracy_score(y_test, y_pred):.4f}")
print(f"Precision: {precision_score(y_test, y_pred, average='weighted'):.4f}")
print(f"Recall:    {recall_score(y_test, y_pred, average='weighted'):.4f}")
print(f"F1 Score:  {f1_score(y_test, y_pred, average='weighted'):.4f}")

# Label names
class_names = [label_map[i] for i in sorted(label_map)]

# Report
print("\n📋 Classification Report:")
print(classification_report(y_test, y_pred, target_names=class_names))

# Confusion Matrix
plt.figure(figsize=(10, 8))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

# ============================
# 🧠 Predict & Visualize Random Test Image
# ============================
def visualize_prediction(index=0):
    img_pca = X_test_pca[index].reshape(1, -1)
    pred_label = clf.predict(img_pca)[0]
    true_label = y_test[index]
    pred_name = label_map[pred_label]
    true_name = label_map[true_label]

    # Inverse PCA + reshape (approximate visualization)
    original_flat = pca.inverse_transform(img_pca).reshape((1, -1))
    plt.figure(figsize=(6, 1))
    plt.imshow(original_flat, aspect='auto', cmap='gray')
    plt.title(f"Predicted: {pred_name} | True: {true_name}")
    plt.axis('off')
    plt.show()

# Show prediction on first test image
visualize_prediction(index=0)

Extracting:  39%|███▉      | 24/61 [1:17:54<2:04:53, 202.52s/it]

In [None]:
from google.colab import drive
drive.mount('/content/drive')

ValueError: Mountpoint must not already contain files