In [1]:
from tensorflow.keras.regularizers import l2
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (Conv2D, BatchNormalization, Activation, 
                                     MaxPooling2D, Dropout, Flatten, Dense)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd 
import os


csv_path = "/kaggle/input/datathon-ai-qualification-round/train_data.csv"


labels_df = pd.read_csv(csv_path)


train_dir = "/kaggle/input/datathon-ai-qualification-round/train/train"
test_dir = "/kaggle/input/datathon-ai-qualification-round/test/test"
labels_df['path'] = labels_df['filename'].apply(lambda x: os.path.join(train_dir, x))
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2  
)


train_generator = datagen.flow_from_dataframe(
    dataframe=labels_df,
    directory=train_dir,
    x_col="filename",
    y_col="city",  
    target_size=(64, 64),
    batch_size=32,
    class_mode="categorical",
    color_mode='rgb',
    subset="training"
)

val_generator = datagen.flow_from_dataframe(
    dataframe=labels_df,
    directory=train_dir,
    x_col="filename",
    y_col="city", 
    target_size=(64, 64),
    batch_size=32,
    class_mode="categorical",
    color_mode='rgb',
    subset="validation"
)

model = Sequential([
    Conv2D(64, (3, 3), padding='same', input_shape=(64, 64, 3), kernel_regularizer=l2(0.01)),
    BatchNormalization(),
    Activation('relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(128, (3, 3), padding='same', kernel_regularizer=l2(0.01)),
    BatchNormalization(),
    Activation('relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(0.01)),
    BatchNormalization(),
    Activation('relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Flatten(),
    Dense(512, kernel_regularizer=l2(0.01)),
    BatchNormalization(),
    Activation('relu'),
    Dropout(0.5),

    Dense(len(train_generator.class_indices), activation='softmax')
])

model.compile(
    loss='categorical_crossentropy',
    optimizer=Adam(learning_rate=0.001),
    metrics=['accuracy']
)

callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)
]


history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=50,
    callbacks=callbacks
)
test_datagen = ImageDataGenerator(rescale=1./255)


test_generator = test_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'filename': os.listdir(test_dir)}), 
    directory=test_dir,
    x_col="filename",
    class_mode=None,  
    target_size=(64, 64),
    batch_size=1,
    shuffle=False,
    color_mode='rgb'
)

predictions = model.predict(test_generator)
predicted_classes = np.argmax(predictions, axis=-1)


class_labels = list(train_generator.class_indices.keys())
predicted_labels = [class_labels[idx] for idx in predicted_classes]


output = pd.DataFrame({
    "filename": test_generator.filenames,
    "predicted_city": predicted_labels
})
output.to_csv("submission.csv", index=False)



Found 5600 validated image filenames belonging to 3 classes.
Found 1400 validated image filenames belonging to 3 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50


  self._warn_if_super_not_called()


[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 762ms/step - accuracy: 0.5212 - loss: 9.8989 - val_accuracy: 0.3150 - val_loss: 3.2938 - learning_rate: 0.0010
Epoch 2/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 703ms/step - accuracy: 0.6187 - loss: 2.1723 - val_accuracy: 0.3150 - val_loss: 2.1167 - learning_rate: 0.0010
Epoch 3/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 702ms/step - accuracy: 0.6212 - loss: 1.5872 - val_accuracy: 0.4879 - val_loss: 1.5486 - learning_rate: 0.0010
Epoch 4/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 713ms/step - accuracy: 0.6369 - loss: 1.4079 - val_accuracy: 0.4879 - val_loss: 1.5128 - learning_rate: 0.0010
Epoch 5/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 705ms/step - accuracy: 0.6534 - loss: 1.3249 - val_accuracy: 0.5907 - val_loss: 1.5073 - learning_rate: 0.0010
Epoch 6/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m