Today, we will be trying to classify photos by which photographer took them, using CNN models! We have Alex, Kelly, and Hunter as our photographers, and our goal is to be able to predict which photographer is responsible for each picture.

We'll start by loading in our image data. We have 2 classes in our training data, Alex's photos and Kelly's photos. Hunter's photos are mixed into the testing data, to see if our model is able to distinguish a "Neither" category as well as Alex and Kelly.

In [6]:
import os
import pandas as pd
import glob
import numpy as np
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
import os
import pandas as pd
import numpy as np
import glob
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping

# --- Load image paths ---
def load_image_paths(base_dir):
    data = []
    for label in ['Alex', 'Kelly']:
        path = os.path.join(base_dir, label)
        if not os.path.exists(path):
            print(f"⚠️ Folder not found: {path}")
            continue
        for ext in ('*.png', '*.jpg', '*.jpeg'):
            for img_file in glob.glob(os.path.join(path, ext)):
                data.append({'filepath': img_file, 'label': label})
    df = pd.DataFrame(data)
    print(f"📄 Loaded {len(df)} images.")
    return df

# --- Prepare dataframe ---
base_dir = '/Users/anhuynh/Downloads/Alex_Kelly_Pics'
df = load_image_paths(base_dir)

# Encode labels for stratification
df['label_idx'] = df['label'].astype('category').cat.codes  # Alex=0, Kelly=1

# --- Data generator ---
datagen = ImageDataGenerator(rescale=1./255)

In [None]:

print(df.head())


📄 Loaded 485 images total.
                                            filepath label
0  /Users/anhuynh/Downloads/Alex_Kelly_Pics/Alex/...  Alex
1  /Users/anhuynh/Downloads/Alex_Kelly_Pics/Alex/...  Alex
2  /Users/anhuynh/Downloads/Alex_Kelly_Pics/Alex/...  Alex
3  /Users/anhuynh/Downloads/Alex_Kelly_Pics/Alex/...  Alex
4  /Users/anhuynh/Downloads/Alex_Kelly_Pics/Alex/...  Alex


Let's start building our CNN model!

We will now train our CNN model on all of Kelly and Alex's pictures.

In [None]:


# --- K-Fold Cross Validation ---
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
accuracies = []

for fold, (train_idx, val_idx) in enumerate(kf.split(df['filepath'], df['label_idx'])):
    print(f"\n📂 Fold {fold+1}")

    train_df = df.iloc[train_idx]
    val_df = df.iloc[val_idx]

    train_gen = datagen.flow_from_dataframe(
        train_df,
        x_col='filepath',
        y_col='label',
        target_size=(224, 224),
        batch_size=32,
        class_mode='binary',
        shuffle=True,
        seed=42
    )

    val_gen = datagen.flow_from_dataframe(
        val_df,
        x_col='filepath',
        y_col='label',
        target_size=(224, 224),
        batch_size=32,
        class_mode='binary',
        shuffle=False
    )

    # --- CNN Model builder ---
    def create_model():
        model = models.Sequential([
            layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
            layers.MaxPooling2D(2, 2),
            layers.Conv2D(64, (3, 3), activation='relu'),
            layers.MaxPooling2D(2, 2),
            layers.Conv2D(128, (3, 3), activation='relu'),
            layers.MaxPooling2D(2, 2),
            layers.Dropout(0.5),
            layers.Flatten(),
            layers.Dense(128, activation='relu'),
            layers.Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        return model

    model = create_model()

    # --- Early stopping ---
    early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

    # --- Train model ---
    history = model.fit(
        train_gen,
        validation_data=val_gen,
        epochs=10,
        callbacks=[early_stop],
        verbose=1
    )

    # --- Evaluate model ---
    loss, acc = model.evaluate(val_gen, verbose=0)
    print(f"✅ Fold {fold+1} Accuracy: {acc:.4f}")
    accuracies.append(acc)

# --- Cross-validation results summary ---
print(f"\n📊 Average CV Accuracy: {np.mean(accuracies):.4f} ± {np.std(accuracies):.4f}")


📄 Loaded 485 images.

📂 Fold 1
Found 388 validated image filenames belonging to 2 classes.
Found 97 validated image filenames belonging to 2 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - accuracy: 0.5246 - loss: 1.5100 - val_accuracy: 0.5464 - val_loss: 0.6899
Epoch 2/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 971ms/step - accuracy: 0.5581 - loss: 0.6860 - val_accuracy: 0.5670 - val_loss: 0.6683
Epoch 3/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 981ms/step - accuracy: 0.5784 - loss: 0.6721 - val_accuracy: 0.5773 - val_loss: 0.6694
Epoch 4/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 971ms/step - accuracy: 0.6063 - loss: 0.6706 - val_accuracy: 0.6907 - val_loss: 0.6387
Epoch 5/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 1s/step - accuracy: 0.5562 - loss: 0.6893 - val_accuracy: 0.5567 - val_loss: 0.6525
Epoch 6/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 971ms/step - accuracy: 0.6346 - loss: 0.6324 - val_accuracy: 0.7010 - val_loss: 0.5877
Epoch 7/10
[1m13/13[0m [3

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1s/step - accuracy: 0.5207 - loss: 0.8405 - val_accuracy: 0.4742 - val_loss: 0.6911
Epoch 2/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 975ms/step - accuracy: 0.5013 - loss: 0.6873 - val_accuracy: 0.5464 - val_loss: 0.6865
Epoch 3/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 991ms/step - accuracy: 0.6027 - loss: 0.6468 - val_accuracy: 0.6598 - val_loss: 0.6848
Epoch 4/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 987ms/step - accuracy: 0.7023 - loss: 0.6192 - val_accuracy: 0.6082 - val_loss: 0.6858
Epoch 5/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 988ms/step - accuracy: 0.6869 - loss: 0.5897 - val_accuracy: 0.7010 - val_loss: 0.6380
Epoch 6/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 972ms/step - accuracy: 0.7966 - loss: 0.5089 - val_accuracy: 0.6598 - val_loss: 0.6502
Epoch 7/10
[1m13/13[0m 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - accuracy: 0.4879 - loss: 1.4176 - val_accuracy: 0.5464 - val_loss: 0.6916
Epoch 2/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 1s/step - accuracy: 0.5213 - loss: 0.6912 - val_accuracy: 0.5670 - val_loss: 0.6869
Epoch 3/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 1s/step - accuracy: 0.6264 - loss: 0.6829 - val_accuracy: 0.5361 - val_loss: 0.6786
Epoch 4/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 992ms/step - accuracy: 0.5304 - loss: 0.6776 - val_accuracy: 0.6598 - val_loss: 0.6520
Epoch 5/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 981ms/step - accuracy: 0.6461 - loss: 0.6388 - val_accuracy: 0.6186 - val_loss: 0.6881
Epoch 6/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 982ms/step - accuracy: 0.6498 - loss: 0.6130 - val_accuracy: 0.6186 - val_loss: 0.7224
Epoch 7/10
[1m13/13[0m [32m━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1s/step - accuracy: 0.5225 - loss: 1.3664 - val_accuracy: 0.5876 - val_loss: 0.6922
Epoch 2/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 1s/step - accuracy: 0.4847 - loss: 0.6931 - val_accuracy: 0.4845 - val_loss: 0.6970
Epoch 3/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m160s[0m 13s/step - accuracy: 0.5573 - loss: 0.6768 - val_accuracy: 0.5567 - val_loss: 0.7231
Epoch 4/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1s/step - accuracy: 0.5649 - loss: 0.6887 - val_accuracy: 0.6186 - val_loss: 0.6460
Epoch 5/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 984ms/step - accuracy: 0.7295 - loss: 0.5874 - val_accuracy: 0.6186 - val_loss: 0.6927
Epoch 6/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 991ms/step - accuracy: 0.7228 - loss: 0.5708 - val_accuracy: 0.5979 - val_loss: 0.6655
Epoch 7/10
[1m13/13[0m [32m━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - accuracy: 0.4919 - loss: 1.1474 - val_accuracy: 0.5258 - val_loss: 0.7055
Epoch 2/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 979ms/step - accuracy: 0.5939 - loss: 0.6771 - val_accuracy: 0.5464 - val_loss: 0.6780
Epoch 3/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 963ms/step - accuracy: 0.5756 - loss: 0.6624 - val_accuracy: 0.6598 - val_loss: 0.6616
Epoch 4/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 984ms/step - accuracy: 0.7006 - loss: 0.6046 - val_accuracy: 0.5876 - val_loss: 0.7025
Epoch 5/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 959ms/step - accuracy: 0.6867 - loss: 0.6391 - val_accuracy: 0.5464 - val_loss: 0.7136
Epoch 6/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 976ms/step - accuracy: 0.7205 - loss: 0.5281 - val_accuracy: 0.6186 - val_loss: 0.7559
✅ Fold 5 Accuracy: 0.6598

We cross validated on our training data to check the metrics, and we have a pretty decent average accuracy of 0.7052!
Let's now find which fold did the best and then use it to predict on the test data!

In [21]:
import tempfile

# Before the loop
temp_dir = tempfile.gettempdir()
model_paths = []

# Inside the loop, after training
model_path = os.path.join(temp_dir, f"best_model_fold_{fold+1}.h5")
model.save(model_path)
model_paths.append(model_path)




In [22]:
best_fold = np.argmax(accuracies)
best_model_path = model_paths[best_fold]

print(f"\n🏆 Best fold: {best_fold+1} with accuracy {accuracies[best_fold]:.4f}")
print(f"📁 Loading best model from: {best_model_path}")



🏆 Best fold: 1 with accuracy 0.7629
📁 Loading best model from: /var/folders/v9/hcdjkhvd5nq6llrn61tfl3qr0000gn/T/best_model_fold_5.h5


As we can see, Fold 1 presented us with the best accuracy! - this is the model we will use to predict our test data witt. 

In [24]:
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Load best model
model = load_model(best_model_path)

# Load test images
test_dir = '/Users/anhuynh/Downloads/Alex_Kelly_Pics/TestSet'  # Folder with test images
test_files = [os.path.join(test_dir, fname) for fname in os.listdir(test_dir) if fname.endswith(('.png', '.jpg', '.jpeg'))]

# Create DataFrame for test set
test_df = pd.DataFrame({'filepath': test_files})

# Create test generator
test_datagen = ImageDataGenerator(rescale=1./255)
test_gen = test_datagen.flow_from_dataframe(
    test_df,
    x_col='filepath',
    y_col=None,
    target_size=(224, 224),
    batch_size=32,
    class_mode=None,
    shuffle=False
)

# Predict
preds = model.predict(test_gen)
predicted_labels = (preds > 0.5).astype(int).flatten()
label_map = {0: 'Alex', 1: 'Kelly'}
predicted_names = [label_map[p] for p in predicted_labels]

# Display predictions
results = pd.DataFrame({
    'filename': test_df['filepath'].apply(os.path.basename),
    'prediction': predicted_names
})
print(results)

# Optional: Save to CSV
# results.to_csv("test_predictions.csv", index=False)




Found 20 validated image filenames.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 325ms/step
              filename prediction
0   TestSetImage01.png      Kelly
1   TestSetImage15.png       Alex
2   TestSetImage14.png       Alex
3   TestSetImage16.png      Kelly
4   TestSetImage02.png       Alex
5   TestSetImage03.png      Kelly
6   TestSetImage17.png      Kelly
7   TestSetImage13.png      Kelly
8   TestSetImage07.png      Kelly
9   TestSetImage06.png      Kelly
10  TestSetImage12.png      Kelly
11  TestSetImage04.png      Kelly
12  TestSetImage10.png      Kelly
13  TestSetImage11.png       Alex
14  TestSetImage05.png       Alex
15  TestSetImage08.png      Kelly
16  TestSetImage20.png      Kelly
17  TestSetImage09.png      Kelly
18  TestSetImage19.png      Kelly
19  TestSetImage18.png      Kelly


Using our best CNN model and fold, we went ahead and predicted on the Test data to get these results!