<a href="https://colab.research.google.com/github/hsevras/Stamatics-deepflow/blob/main/week6/major_assignment2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install Kaggle API
!pip install -q kaggle

# Upload kaggle.json from your Kaggle account
from google.colab import files
files.upload()  # Upload kaggle.json when prompted

# Move kaggle.json to the correct directory
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download dataset
!kaggle competitions download -c dog-breed-identification
!unzip -q dog-breed-identification.zip
!unzip -q train.zip -d train
!unzip -q test.zip -d test


Saving kaggle.json to kaggle.json
Downloading dog-breed-identification.zip to /content
 99% 683M/691M [00:03<00:00, 97.8MB/s]
100% 691M/691M [00:03<00:00, 194MB/s] 
unzip:  cannot find or open train.zip, train.zip.zip or train.zip.ZIP.
unzip:  cannot find or open test.zip, test.zip.zip or test.zip.ZIP.


In [2]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import GlobalAveragePooling2D, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Load labels
labels_df = pd.read_csv('labels.csv')
labels_df['file_path'] = labels_df['id'].apply(lambda x: f"train/{x}.jpg")

print(f"Total Images: {len(labels_df)}")
print(f"Total Classes: {labels_df['breed'].nunique()}")

# Train-validation split
train_df, val_df = train_test_split(
    labels_df, test_size=0.2, stratify=labels_df['breed'], random_state=42
)


Total Images: 10222
Total Classes: 120


In [3]:
img_size = 224
batch_size = 32

train_gen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    zoom_range=0.2,
    horizontal_flip=True
)

val_gen = ImageDataGenerator(rescale=1./255)

train_data = train_gen.flow_from_dataframe(
    train_df,
    x_col='file_path',
    y_col='breed',
    target_size=(img_size, img_size),
    batch_size=batch_size,
    class_mode='categorical'
)

val_data = val_gen.flow_from_dataframe(
    val_df,
    x_col='file_path',
    y_col='breed',
    target_size=(img_size, img_size),
    batch_size=batch_size,
    class_mode='categorical'
)


Found 8177 validated image filenames belonging to 120 classes.
Found 2045 validated image filenames belonging to 120 classes.


In [None]:
base_model = EfficientNetB0(
    input_shape=(img_size, img_size, 3),
    include_top=False,
    weights='imagenet'
)

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
output = Dense(len(train_data.class_indices), activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=output)

# Freeze base model for fast training
base_model.trainable = False

model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

# Train
history = model.fit(
    train_data,
    validation_data=val_data,
    epochs=5
)


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


  self._warn_if_super_not_called()


Epoch 1/5
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.0095 - loss: 4.9041

  self._warn_if_super_not_called()


[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m940s[0m 4s/step - accuracy: 0.0095 - loss: 4.9040 - val_accuracy: 0.0083 - val_loss: 4.8467
Epoch 2/5
[1m233/256[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m1:07[0m 3s/step - accuracy: 0.0104 - loss: 4.8709

In [None]:
# Unfreeze base model for fine-tuning
base_model.trainable = True
model.compile(
    optimizer=Adam(learning_rate=1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

history_finetune = model.fit(
    train_data,
    validation_data=val_data,
    epochs=3
)
