In [16]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.applications import ResNet50
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor

# Step 1: Load the data
df = pd.read_csv('augmented_train.csv')

In [3]:
df.head()

Unnamed: 0,Id,Subject Focus,Eyes,Face,Near,Action,Accessory,Group,Collage,Human,Occlusion,Info,Blur,Pawpularity,Image
0,0007de18844b0dbbb5e1f607da0606e0,0,1,1,1,0,0,1,0,0,0,0,0,0.63,0007de18844b0dbbb5e1f607da0606e0_aug_1
1,0007de18844b0dbbb5e1f607da0606e0,0,1,1,1,0,0,1,0,0,0,0,0,0.63,0007de18844b0dbbb5e1f607da0606e0_aug_2
2,0007de18844b0dbbb5e1f607da0606e0,0,1,1,1,0,0,1,0,0,0,0,0,0.63,0007de18844b0dbbb5e1f607da0606e0_aug_3
3,0007de18844b0dbbb5e1f607da0606e0,0,1,1,1,0,0,1,0,0,0,0,0,0.63,0007de18844b0dbbb5e1f607da0606e0_aug_4
4,0007de18844b0dbbb5e1f607da0606e0,0,1,1,1,0,0,1,0,0,0,0,0,0.63,0007de18844b0dbbb5e1f607da0606e0_aug_5


In [11]:
# Step 2: Preprocess the data
# Separate features and target
imagedata = df['Id']
features = df.drop(columns=['Pawpularity','Id'])
target = df['Pawpularity']

features = features.select_dtypes(include=[np.number])

features = features.astype(np.float32)
target = target.astype(np.float32)

# Split into training and validation sets (80% train, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(features, target, test_size=0.2, random_state=42)

# Ensure the labels are in the correct shape
y_train = y_train.values.reshape(-1, 1)
y_val = y_val.values.reshape(-1, 1)



Regression Model

In [28]:
model = Sequential([
    Dense(128, input_shape=(X_train.shape[1],), activation='relu'),
    BatchNormalization(),
    Dropout(0.4),
    
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    
    Dense(32, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    
    Dense(1, activation='linear')  # Output layer for regression
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [29]:
# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), 
              loss='mse', 
              metrics=[tf.keras.metrics.RootMeanSquaredError()])

# Step 4: Train the model
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=32,
    callbacks=[early_stopping, reduce_lr],
    verbose=1
)

# Step 5: Evaluate the model
val_predictions = model.predict(X_val)
rmse = np.sqrt(mean_squared_error(y_val, val_predictions))
print(f"Validation RMSE: {rmse:.4f}")

Epoch 1/100
[1m1239/1239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 910us/step - loss: 0.5910 - root_mean_squared_error: 0.7374 - val_loss: 0.0428 - val_root_mean_squared_error: 0.2068 - learning_rate: 0.0010
Epoch 2/100
[1m1239/1239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 739us/step - loss: 0.0470 - root_mean_squared_error: 0.2167 - val_loss: 0.0423 - val_root_mean_squared_error: 0.2057 - learning_rate: 0.0010
Epoch 3/100
[1m1239/1239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 710us/step - loss: 0.0429 - root_mean_squared_error: 0.2072 - val_loss: 0.0424 - val_root_mean_squared_error: 0.2058 - learning_rate: 0.0010
Epoch 4/100
[1m1239/1239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 713us/step - loss: 0.0432 - root_mean_squared_error: 0.2078 - val_loss: 0.0425 - val_root_mean_squared_error: 0.2060 - learning_rate: 0.0010
Epoch 5/100
[1m1239/1239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 671us/step - loss: 0.0433 - root_mea

KNN

In [14]:
# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Initialize the KNN regressor
knn = KNeighborsRegressor()

# Hyperparameter tuning using GridSearchCV
param_grid = {'n_neighbors': [3, 5, 7, 10], 'weights': ['uniform', 'distance']}
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train_scaled, y_train)

# Best parameters from GridSearch
best_knn = grid_search.best_estimator_

# Make predictions
y_pred = best_knn.predict(X_val_scaled)

# Evaluate the model
mse = mean_squared_error(y_val, y_pred)
rmse = np.sqrt(mse)
print(f"RMSE: {rmse:.4f}")


RMSE: 0.2225


Gradient Boosting Regressor

In [17]:
# Initialize the GradientBoostingRegressor with default parameters
model = GradientBoostingRegressor()

# Hyperparameter tuning using GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 4, 5],
}
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, n_jobs=-1, scoring='neg_mean_squared_error')

# Fit the model with the best parameters from GridSearchCV
grid_search.fit(X_train, y_train)

# Best parameters and model
print(f"Best parameters: {grid_search.best_params_}")

# Evaluate the best model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_val)

# Evaluate performance using RMSE
mse = mean_squared_error(y_val, y_pred)
rmse = np.sqrt(mse)
print(f"RMSE: {rmse:.4f}")

  y = column_or_1d(y, warn=True)  # TODO: Is this still required?
  y = column_or_1d(y, warn=True)  # TODO: Is this still required?
  y = column_or_1d(y, warn=True)  # TODO: Is this still required?
  y = column_or_1d(y, warn=True)  # TODO: Is this still required?
  y = column_or_1d(y, warn=True)  # TODO: Is this still required?
  y = column_or_1d(y, warn=True)  # TODO: Is this still required?
  y = column_or_1d(y, warn=True)  # TODO: Is this still required?
  y = column_or_1d(y, warn=True)  # TODO: Is this still required?
  y = column_or_1d(y, warn=True)  # TODO: Is this still required?
  y = column_or_1d(y, warn=True)  # TODO: Is this still required?
  y = column_or_1d(y, warn=True)  # TODO: Is this still required?
  y = column_or_1d(y, warn=True)  # TODO: Is this still required?
  y = column_or_1d(y, warn=True)  # TODO: Is this still required?
  y = column_or_1d(y, warn=True)  # TODO: Is this still required?
  y = column_or_1d(y, warn=True)  # TODO: Is this still required?
  y = colu

Best parameters: {'learning_rate': 0.2, 'max_depth': 5, 'n_estimators': 200}
RMSE: 0.2035


ResNet

In [30]:
# Split into training and validation sets (80% train, 20% validation)
X_train2, X_val2, y_train2, y_val2 = train_test_split(imagedata, target, test_size=0.2, random_state=42)

# Ensure the labels are in the correct shape
y_train2 = y_train2.values.reshape(-1, 1)
y_val2 = y_val2.values.reshape(-1, 1)

In [32]:
# Load ResNet50 model with pre-trained ImageNet weights
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the layers of ResNet
base_model.trainable = False

# Build the model
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(1024, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='linear')  # Use linear for regression output
])

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), 
              loss='mse', 
              metrics=[tf.keras.metrics.RootMeanSquaredError()])

# Define callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

# Train the model
history = model.fit(
    X_train2, y_train2, 
    validation_data=(X_val2, y_val2), 
    epochs=100,
    batch_size=32,
    callbacks=[early_stopping, reduce_lr],
    verbose=1
)

# Evaluate the model
val_predictions = model.predict(X_val2)
rmse = np.sqrt(mean_squared_error(y_val2, val_predictions))
print(f"Validation RMSE: {rmse:.4f}")

Epoch 1/100


ValueError: Exception encountered when calling Sequential.call().

[1mInvalid input shape for input Tensor("sequential_4_1/Cast:0", shape=(32, 1), dtype=float32). Expected shape (None, 224, 224, 3), but input has incompatible shape (32, 1)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(32, 1), dtype=string)
  • training=True
  • mask=None