**Libraries**

In [235]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense 
import matplotlib.pyplot as plt
from tensorflow import keras
from keras.utils import to_categorical
import tensorflow as tf
from sklearn.metrics import accuracy_score
from keras import callbacks
from keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [236]:
alex = pd.read_excel('data/lab6_train_data.xlsx', sheet_name='Alex')
kelly = pd.read_excel('data/lab6_train_data.xlsx', sheet_name='Kelly')
test = pd.read_excel('data/lab6_train_data.xlsx', sheet_name='Test')

In [237]:
import glob
from PIL import Image
import numpy as np
import pandas as pd

def load_and_flatten_images(root_dir,
                            exts=(".jpg", ".jpeg", ".png", ".bmp"),
                            target_size=(64, 64)):
    """
    1) Walks through `root_dir`, finds all files ending in `exts` (recursively).
    2) Opens each image, converts to RGB, resizes to `target_size`.
    3) Flattens the H×W×C array into a single vector of length (H*W*C).
    4) Returns: a pandas DataFrame where each row is one image, with:
         • column "filename" (the basename of the image file), and
         • columns "pixel_0", "pixel_1", …, "pixel_{(H*W*C)-1}".
    """
    all_files = []
    for ext in exts:
        all_files.extend(glob.glob(f"{root_dir}/**/*{ext}", recursive=True))

    rows = []
    for fp in all_files:
        try:
            img = Image.open(fp).convert("RGB")
        except Exception as e:
            print(f"✗ Could not open {fp}: {e}")
            continue

        if target_size is not None:
            img = img.resize(target_size, resample=Image.BILINEAR)

        arr = np.array(img)  / 255.0 # now each pixel ∈ [0,1]
        flat = arr.reshape(-1)  # shape = (H*W*3,)

        # build a single row: [filename, pixel_0, pixel_1, ..., pixel_n]
        basename = fp.split("/")[-1]  # or os.path.basename(fp)
        row = [basename] + flat.tolist()
        rows.append(row)

    if len(rows) == 0:
        return pd.DataFrame()  # empty

    # build column names: first is "filename", then "pixel_0", …, "pixel_{num_pixels-1}"
    num_pixels = target_size[0] * target_size[1] * 3
    col_names = ["Image ID"] + [f"pixel_{i}" for i in range(num_pixels)]
    df = pd.DataFrame(rows, columns=col_names)
    return df


In [238]:
#Alex Images
folder_path = "Alex_Kelly_Pics/Alex"
alex_images = load_and_flatten_images(folder_path, target_size=(64, 64))
print(alex_images.shape)         # e.g., (N_images, 1 + 224*224*3)
print(alex_images.columns[:5])  # ['filename', 'pixel_0', 'pixel_1', 'pixel_2', 'pixel_3']
print(alex_images.iloc[0, :10]) # show first image's name + first few pixel values

# If you really need a NumPy array for “everything but the filename”:
X = alex_images.drop(columns=["Image ID"]).to_numpy()   # shape = (N_images, 224*224*3)
filenames = alex_images["Image ID"].tolist()

(256, 12289)
Index(['Image ID', 'pixel_0', 'pixel_1', 'pixel_2', 'pixel_3'], dtype='object')
Image ID    Alex-Image119.png
pixel_0              0.345098
pixel_1              0.403922
pixel_2              0.462745
pixel_3              0.356863
pixel_4              0.423529
pixel_5              0.478431
pixel_6              0.368627
pixel_7              0.435294
pixel_8              0.494118
Name: 0, dtype: object


In [239]:
#Kelly Images
folder_path = "Alex_Kelly_Pics/Kelly"
kelly_images = load_and_flatten_images(folder_path, target_size=(64, 64))
print(kelly_images.shape)         # e.g., (N_images, 1 + 224*224*3)
print(kelly_images.columns[:5])  # ['filename', 'pixel_0', 'pixel_1', 'pixel_2', 'pixel_3']
print(kelly_images.iloc[0, :10]) # show first image's name + first few pixel values

# If you really need a NumPy array for “everything but the filename”:
X = kelly_images.drop(columns=["Image ID"]).to_numpy()   # shape = (N_images, 224*224*3)
filenames = kelly_images["Image ID"].tolist()

(229, 12289)
Index(['Image ID', 'pixel_0', 'pixel_1', 'pixel_2', 'pixel_3'], dtype='object')
Image ID    Kelly-Image124.png
pixel_0               0.356863
pixel_1               0.341176
pixel_2               0.317647
pixel_3               0.360784
pixel_4               0.345098
pixel_5               0.317647
pixel_6                0.34902
pixel_7               0.333333
pixel_8               0.301961
Name: 0, dtype: object


In [240]:
#TestSet Images
folder_path = "Alex_Kelly_Pics/TestSet"
test_images = load_and_flatten_images(folder_path, target_size=(64, 64))
print(test_images.shape)         # e.g., (N_images, 1 + 224*224*3)
print(test_images.columns[:5])  # ['filename', 'pixel_0', 'pixel_1', 'pixel_2', 'pixel_3']
print(test_images.iloc[0, :10]) # show first image's name + first few pixel values

# If you really need a NumPy array for “everything but the filename”:
X = test_images.drop(columns=["Image ID"]).to_numpy()   # shape = (N_images, 224*224*3)
filenames = test_images["Image ID"].tolist()

(20, 12289)
Index(['Image ID', 'pixel_0', 'pixel_1', 'pixel_2', 'pixel_3'], dtype='object')
Image ID    TestSetImage01.png
pixel_0               0.364706
pixel_1               0.345098
pixel_2               0.298039
pixel_3               0.231373
pixel_4               0.227451
pixel_5               0.219608
pixel_6               0.243137
pixel_7               0.247059
pixel_8               0.270588
Name: 0, dtype: object


In [241]:
#combine into single image
#df["filename"].str.removesuffix(".png")


In [242]:
#stack cat df
train = pd.concat([alex, kelly], ignore_index=True)

#stack images df
train_images = pd.concat([alex_images, kelly_images], ignore_index=True)

#remove .png to merge datasets
train_images["Image ID"] = train_images["Image ID"].str.removesuffix(".png")
test_images["Image ID"] = test_images["Image ID"].str.removesuffix(".png")

#merge data
train = pd.merge(train_images, train, on="Image ID")
test = pd.merge(test_images, test, on="Image ID")

**Modeling**

Approach 1: 

* Train a (convolutional) neural network to identify whether or not there is a human in the image.
* Train a (convolutional) neural network to identify whether or not there is a castle in the image.
* Train a (convolutional) neural network to identify whether the image is taken indoors or outdoors.
* Train a (convolutional) neural network to identify the landscape of the image (e.g. city, suburb, or nature/rural)
* Choose at least 10 other features (or feature categories) that you suspect might be useful for differentiating Alex and Kelly's photos, and train individual (convolutional) neural networks to identify those.

In [243]:
model = models.Sequential()

#input layer
model.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(64, 64, 3)))

#hidden layers
model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(0.25))

#output layer
model.add(layers.Dense(1, activation='sigmoid'))

#summarize
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [244]:
#compile model
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)


**Train a (convolutional) neural network to identify whether or not there is a human in the image.**

In [245]:
#variable selection
X = train.drop(columns=['Image ID', 'Human', 'Castle', 'Indoors or Outdoors', 'Landscape (City, Suburb, or Nature/Rural)', 'Child/Baby', 'Animal', 'Cat', 'Dog', 'Body_of_Water', 'Car', 'Bridge', 'Food', 'Tree', 'Mountain', 'Instrument', 'Drink'])
y = train['Human']

X = X.to_numpy().reshape(-1, 64, 64, 3)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [246]:
# early call back
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')

#fit model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=64,
    callbacks=[early_stopping],
    verbose=0
)


Epoch 17: early stopping


In [247]:
human_pred = (model.predict(X_test) > 0.5).astype(int).ravel()

# get accuracy score
accuracy_score(y_test, human_pred)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step


0.5567010309278351

**Train a (convolutional) neural network to identify whether or not there is a castle in the image.**

In [248]:
#variable selection
X = train.drop(columns=['Image ID', 'Human', 'Castle', 'Indoors or Outdoors', 'Landscape (City, Suburb, or Nature/Rural)', 'Child/Baby', 'Animal', 'Cat', 'Dog', 'Body_of_Water', 'Car', 'Bridge', 'Food', 'Tree', 'Mountain', 'Instrument', 'Drink'])
y = train['Castle']

X = X.to_numpy().reshape(-1, 64, 64, 3)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [249]:
# early call back
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')

#fit model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=64,
    callbacks=[early_stopping],
    verbose=0
)


Epoch 24: early stopping


In [250]:
castle_pred = (model.predict(X_test) > 0.5).astype(int).ravel()

# get accuracy score
accuracy_score(y_test, castle_pred)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step


0.9381443298969072

**Train a (convolutional) neural network to identify whether the image is taken indoors or outdoors.**

In [251]:
#variable selection
X = train.drop(columns=['Image ID', 'Human', 'Castle', 'Indoors or Outdoors', 'Landscape (City, Suburb, or Nature/Rural)', 'Child/Baby', 'Animal', 'Cat', 'Dog', 'Body_of_Water', 'Car', 'Bridge', 'Food', 'Tree', 'Mountain', 'Instrument', 'Drink'])
y = train['Indoors or Outdoors']

X = X.to_numpy().reshape(-1, 64, 64, 3)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [252]:
# early call back
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')

#fit model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=64,
    callbacks=[early_stopping],
    verbose=0
)


Epoch 18: early stopping


In [253]:
indoor_outdoor_pred = (model.predict(X_test) > 0.5).astype(int).ravel()

# get accuracy score
accuracy_score(y_test, indoor_outdoor_pred)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step


0.8144329896907216

**Choose at least 10 other features (or feature categories) that you suspect might be useful for differentiating Alex and Kelly's photos, and train individual (convolutional) neural networks to identify those.**

**Child/Baby**

In [254]:
#variable selection
X = train.drop(columns=['Image ID', 'Human', 'Castle', 'Indoors or Outdoors', 'Landscape (City, Suburb, or Nature/Rural)', 'Child/Baby', 'Animal', 'Cat', 'Dog', 'Body_of_Water', 'Car', 'Bridge', 'Food', 'Tree', 'Mountain', 'Instrument', 'Drink'])
y = train['Child/Baby']

X = X.to_numpy().reshape(-1, 64, 64, 3)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [255]:
# early call back
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')

#fit model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=64,
    callbacks=[early_stopping],
    verbose=0
)


Epoch 13: early stopping


In [256]:
child_baby_pred = (model.predict(X_test) > 0.5).astype(int).ravel()

# get accuracy score
accuracy_score(y_test, child_baby_pred)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


0.9278350515463918

**Animal**

In [257]:
#variable selection
X = train.drop(columns=['Image ID', 'Human', 'Castle', 'Indoors or Outdoors', 'Landscape (City, Suburb, or Nature/Rural)', 'Child/Baby', 'Animal', 'Cat', 'Dog', 'Body_of_Water', 'Car', 'Bridge', 'Food', 'Tree', 'Mountain', 'Instrument', 'Drink'])
y = train['Animal']

X = X.to_numpy().reshape(-1, 64, 64, 3)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [258]:
# early call back
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')

#fit model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=64,
    callbacks=[early_stopping],
    verbose=0
)

Epoch 11: early stopping


In [259]:
animal_pred = (model.predict(X_test) > 0.5).astype(int).ravel()

# get accuracy score
accuracy_score(y_test, animal_pred)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


0.9484536082474226

**Cat**

In [260]:
#variable selection
X = train.drop(columns=['Image ID', 'Human', 'Castle', 'Indoors or Outdoors', 'Landscape (City, Suburb, or Nature/Rural)', 'Child/Baby', 'Animal', 'Cat', 'Dog', 'Body_of_Water', 'Car', 'Bridge', 'Food', 'Tree', 'Mountain', 'Instrument', 'Drink'])
y = train['Cat']

X = X.to_numpy().reshape(-1, 64, 64, 3)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [261]:
# early call back
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')

#fit model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=64,
    callbacks=[early_stopping],
    verbose=0
)

Epoch 12: early stopping


In [262]:
cat_pred = (model.predict(X_test) > 0.5).astype(int).ravel()

# get accuracy score
accuracy_score(y_test, cat_pred)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step


0.979381443298969

**Dog**

In [263]:
#variable selection
X = train.drop(columns=['Image ID', 'Human', 'Castle', 'Indoors or Outdoors', 'Landscape (City, Suburb, or Nature/Rural)', 'Child/Baby', 'Animal', 'Cat', 'Dog', 'Body_of_Water', 'Car', 'Bridge', 'Food', 'Tree', 'Mountain', 'Instrument', 'Drink'])
y = train['Dog']

X = X.to_numpy().reshape(-1, 64, 64, 3)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [264]:
# early call back
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')

#fit model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=64,
    callbacks=[early_stopping],
    verbose=0
)

Epoch 13: early stopping


In [265]:
dog_pred = (model.predict(X_test) > 0.5).astype(int).ravel()

# get accuracy score
accuracy_score(y_test, dog_pred)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step


0.9896907216494846

**Body_of_water**

In [266]:
#variable selection
X = train.drop(columns=['Image ID', 'Human', 'Castle', 'Indoors or Outdoors', 'Landscape (City, Suburb, or Nature/Rural)', 'Child/Baby', 'Animal', 'Cat', 'Dog', 'Body_of_Water', 'Car', 'Bridge', 'Food', 'Tree', 'Mountain', 'Instrument', 'Drink'])
y = train['Body_of_Water']

X = X.to_numpy().reshape(-1, 64, 64, 3)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [267]:
# early call back
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')

#fit model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=64,
    callbacks=[early_stopping],
    verbose=0
)

Epoch 18: early stopping


In [268]:
body_of_water_pred = (model.predict(X_test) > 0.5).astype(int).ravel()

# get accuracy score
accuracy_score(y_test, body_of_water_pred)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step


0.7938144329896907

**Car**

In [269]:
#variable selection
X = train.drop(columns=['Image ID', 'Human', 'Castle', 'Indoors or Outdoors', 'Landscape (City, Suburb, or Nature/Rural)', 'Child/Baby', 'Animal', 'Cat', 'Dog', 'Body_of_Water', 'Car', 'Bridge', 'Food', 'Tree', 'Mountain', 'Instrument', 'Drink'])
y = train['Car']

X = X.to_numpy().reshape(-1, 64, 64, 3)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [270]:
# early call back
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')

#fit model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=64,
    callbacks=[early_stopping],
    verbose=0
)

Epoch 16: early stopping


In [271]:
car_pred = (model.predict(X_test) > 0.5).astype(int).ravel()

# get accuracy score
accuracy_score(y_test, car_pred)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


0.865979381443299

**Bridge**

In [272]:
#variable selection
X = train.drop(columns=['Image ID', 'Human', 'Castle', 'Indoors or Outdoors', 'Landscape (City, Suburb, or Nature/Rural)', 'Child/Baby', 'Animal', 'Cat', 'Dog', 'Body_of_Water', 'Car', 'Bridge', 'Food', 'Tree', 'Mountain', 'Instrument', 'Drink'])
y = train['Bridge']

X = X.to_numpy().reshape(-1, 64, 64, 3)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [273]:
# early call back
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')

#fit model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=64,
    callbacks=[early_stopping],
    verbose=0
)

Epoch 14: early stopping


In [274]:
bridge_pred = (model.predict(X_test) > 0.5).astype(int).ravel()

# get accuracy score
accuracy_score(y_test, bridge_pred)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


0.9896907216494846

**Food**

In [275]:
#variable selection
X = train.drop(columns=['Image ID', 'Human', 'Castle', 'Indoors or Outdoors', 'Landscape (City, Suburb, or Nature/Rural)', 'Child/Baby', 'Animal', 'Cat', 'Dog', 'Body_of_Water', 'Car', 'Bridge', 'Food', 'Tree', 'Mountain', 'Instrument', 'Drink'])
y = train['Food']

X = X.to_numpy().reshape(-1, 64, 64, 3)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [276]:
# early call back
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')

#fit model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=64,
    callbacks=[early_stopping],
    verbose=0
)

Epoch 15: early stopping


In [277]:
food_pred = (model.predict(X_test) > 0.5).astype(int).ravel()

# get accuracy score
accuracy_score(y_test, food_pred)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


0.9484536082474226

**Tree**

In [278]:
#variable selection
X = train.drop(columns=['Image ID', 'Human', 'Castle', 'Indoors or Outdoors', 'Landscape (City, Suburb, or Nature/Rural)', 'Child/Baby', 'Animal', 'Cat', 'Dog', 'Body_of_Water', 'Car', 'Bridge', 'Food', 'Tree', 'Mountain', 'Instrument', 'Drink'])
y = train['Tree']

X = X.to_numpy().reshape(-1, 64, 64, 3)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [279]:
# early call back
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')

#fit model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=64,
    callbacks=[early_stopping],
    verbose=0
)

Epoch 20: early stopping


In [280]:
tree_pred = (model.predict(X_test) > 0.5).astype(int).ravel()

# get accuracy score
accuracy_score(y_test, tree_pred)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step


0.6597938144329897

**Mountain**

In [281]:
#variable selection
X = train.drop(columns=['Image ID', 'Human', 'Castle', 'Indoors or Outdoors', 'Landscape (City, Suburb, or Nature/Rural)', 'Child/Baby', 'Animal', 'Cat', 'Dog', 'Body_of_Water', 'Car', 'Bridge', 'Food', 'Tree', 'Mountain', 'Instrument', 'Drink'])
y = train['Mountain']

X = X.to_numpy().reshape(-1, 64, 64, 3)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [282]:
# early call back
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')

#fit model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=64,
    callbacks=[early_stopping],
    verbose=0
)

Epoch 22: early stopping


In [283]:
mountain_pred = (model.predict(X_test) > 0.5).astype(int).ravel()

# get accuracy score
accuracy_score(y_test, mountain_pred)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


0.7525773195876289

**Instrument**

In [284]:
#variable selection
X = train.drop(columns=['Image ID', 'Human', 'Castle', 'Indoors or Outdoors', 'Landscape (City, Suburb, or Nature/Rural)', 'Child/Baby', 'Animal', 'Cat', 'Dog', 'Body_of_Water', 'Car', 'Bridge', 'Food', 'Tree', 'Mountain', 'Instrument', 'Drink'])
y = train['Instrument']

X = X.to_numpy().reshape(-1, 64, 64, 3)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [285]:
# early call back
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')

#fit model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=64,
    callbacks=[early_stopping],
    verbose=0
)

Epoch 18: early stopping


In [286]:
instrument_pred = (model.predict(X_test) > 0.5).astype(int).ravel()

# get accuracy score
accuracy_score(y_test, instrument_pred)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


0.9690721649484536

**Drink**

In [287]:
#variable selection
X = train.drop(columns=['Image ID', 'Human', 'Castle', 'Indoors or Outdoors', 'Landscape (City, Suburb, or Nature/Rural)', 'Child/Baby', 'Animal', 'Cat', 'Dog', 'Body_of_Water', 'Car', 'Bridge', 'Food', 'Tree', 'Mountain', 'Instrument', 'Drink'])
y = train['Drink']

X = X.to_numpy().reshape(-1, 64, 64, 3)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [288]:
# early call back
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')

#fit model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=64,
    callbacks=[early_stopping],
    verbose=0
)

Epoch 12: early stopping


In [289]:
drink_pred = (model.predict(X_test) > 0.5).astype(int).ravel()

# get accuracy score
accuracy_score(y_test, drink_pred)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step


0.9175257731958762

**Train a (convolutional) neural network to identify the landscape of the image (e.g. city, suburb, or nature/rural)**

In [334]:
#variable selection
X = train.drop(columns=['Image ID', 'Human', 'Castle', 'Indoors or Outdoors', 'Landscape (City, Suburb, or Nature/Rural)', 'Child/Baby', 'Animal', 'Cat', 'Dog', 'Body_of_Water', 'Car', 'Bridge', 'Food', 'Tree', 'Mountain', 'Instrument', 'Drink'])
y = train['Landscape (City, Suburb, or Nature/Rural)']

#reshape X
X = X.to_numpy().reshape(-1, 64, 64, 3)

In [335]:
#encode y categories and one-hot encode

encoder = LabelEncoder()
y_int = encoder.fit_transform(y)

In [336]:
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np

# ─── Dummy data setup (replace this with your real X and y_int) ────────────────────────
# Suppose you already did:
#    X = ...             # shape (485, 64, 64, 3)
#    y_int = ...         # shape (485,) with values in {0,1,2}

# For the sake of demonstration, here is an example of how to generate dummy data:
# (You do NOT need to run these lines if you already have X and y_int from your preprocess.)
#
# N = 485
# X = np.random.rand(N, 64, 64, 3).astype('float32')
# y_int = np.random.randint(0, 3, size=(N,))

# ─── Split the data ─────────────────────────────────────────────────────────────────
# Make sure this matches exactly what you did before:
X_train, X_test, y_train_int, y_test_int = train_test_split(
    X, y_int, test_size=0.2, random_state=42, stratify=y_int
)

# Convert integer labels → one-hot
num_classes = 3
y_train_cat = to_categorical(y_train_int, num_classes=num_classes)
y_test_cat  = to_categorical(y_test_int,  num_classes=num_classes)

# Verify shapes
print("Shapes before model.fit:")
print("  X_train       =", X_train.shape)     # expecting (388, 64, 64, 3)
print("  y_train_cat   =", y_train_cat.shape)  # expecting (388, 3)
print("  X_test        =", X_test.shape)      # expecting ( 97, 64, 64, 3)
print("  y_test_cat    =", y_test_cat.shape)   # expecting ( 97, 3)

# ─── Build & compile the 3-class model ───────────────────────────────────────────────
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', padding='same',
                  input_shape=(64, 64, 3)),
    layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.25),
    layers.Dense(num_classes, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

# ─── EarlyStopping callback ─────────────────────────────────────────────────────────
early_stopping = callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    verbose=1,
    mode='min',
    restore_best_weights=True
)

# ─── Train the model ─────────────────────────────────────────────────────────────────
history = model.fit(
    X_train, y_train_cat,                    # must pass one-hot here
    validation_data=(X_test, y_test_cat),    # must pass one-hot here
    epochs=50,
    batch_size=64,
    callbacks=[early_stopping],
    verbose=1
)

# If you see training progress without error, everything is correct.


Shapes before model.fit:
  X_train       = (388, 64, 64, 3)
  y_train_cat   = (388, 3)
  X_test        = (97, 64, 64, 3)
  y_test_cat    = (97, 3)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 294ms/step - accuracy: 0.3602 - loss: 11.0591 - val_accuracy: 0.4639 - val_loss: 1.9222
Epoch 2/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 289ms/step - accuracy: 0.4470 - loss: 1.5552 - val_accuracy: 0.4433 - val_loss: 1.0950
Epoch 3/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 285ms/step - accuracy: 0.4852 - loss: 1.0870 - val_accuracy: 0.5258 - val_loss: 1.0867
Epoch 4/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 262ms/step - accuracy: 0.5842 - loss: 1.0374 - val_accuracy: 0.4536 - val_loss: 1.0896
Epoch 5/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 301ms/step - accuracy: 0.5597 - loss: 0.9211 - val_accuracy: 0.4845 - val_loss: 1.0275
Epoch 6/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 283ms/step - accuracy: 0.6559 - loss: 0.8023 - val_accuracy: 0.4845 - val_loss: 0.9768
Epoch 7/50
[1m7/7[0m [32m━━━━━━━━━━━

In [340]:
y_proba = model.predict(X_test)

landscape_pred  = np.argmax(y_proba, axis=1)

acc = accuracy_score(y_test_int, landscape_pred)
print("Test accuracy (3-way):", acc)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
Test accuracy (3-way): 0.4845360824742268


**Now, consider the 14+ outputs of these "feature" models to be inputs for a classification model, to classify Alex's photos from Kelly's. This classification model does not need to be a neural network, but it can be.**

In [None]:
predictions_dict = {
    "Human": human_pred,
    "Castle": castle_pred,
    "Indoors or Outdoors": indoor_outdoor_pred,
    "Landscape": landscape_pred,
    "Child/Baby": child_baby_pred,
    "Animal": animal_pred,
    "Cat": cat_pred,
    "Dog": dog_pred,
    "Body_of_Water": body_of_water_pred,
    "Car": car_pred,
    "Bridge": bridge_pred,
    "Food": food_pred,
    "Tree": tree_pred,
    "Mountain": mountain_pred,
    "Instrument": instrument_pred,
    "Drink": drink_pred
}

In [342]:
predictions_df = pd.DataFrame(predictions_dict)