# Petals to the Metal

<img src = "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSpc3PttUQPFJLEvOnv1QEt_50uTTMba6XKVA&s">

 
## Problemin Tanımı
Bu proje, **Petals to the Metal** yarışması kapsamında 104 farklı çiçek türünü sınıflandırmayı amaçlar.
## Veri Formatı
Veriler **TFRecord** formatındadır. Bu format, büyük veri setlerini verimli bir şekilde okumak için idealdir.


# Kütüphaneler


In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import numpy as np
import os
import glob

print(f'TensorFlow Version: {tf.__version__}')


  if not hasattr(np, "object"):


TensorFlow Version: 2.20.0


# TFRecord Okuma Fonksiyonları


In [2]:
# Veri Yolları
DATA_DIR = '../data/tfrecords-jpeg-224x224'
TRAIN_FILENAMES = tf.io.gfile.glob(DATA_DIR + '/train/*.tfrec')
VAL_FILENAMES = tf.io.gfile.glob(DATA_DIR + '/val/*.tfrec')
TEST_FILENAMES = tf.io.gfile.glob(DATA_DIR + '/test/*.tfrec')

print(f'Train TFRecords: {len(TRAIN_FILENAMES)}')
print(f'Val TFRecords: {len(VAL_FILENAMES)}')

# TFRecord Yapısı
IMAGE_SIZE = [224, 224]
AUTO = tf.data.AUTOTUNE

def decode_image(image_data):
    image = tf.image.decode_jpeg(image_data, channels=3)
    image = tf.cast(image, tf.float32) / 255.0  # Normalize
    image = tf.reshape(image, [*IMAGE_SIZE, 3])
    return image

def read_labeled_tfrecord(example):
    LABELED_TFREC_FORMAT = {
        "image": tf.io.FixedLenFeature([], tf.string), # tf.string means bytestring
        "class": tf.io.FixedLenFeature([], tf.int64),  # shape [] means scalar
    }
    example = tf.io.parse_single_example(example, LABELED_TFREC_FORMAT)
    image = decode_image(example['image'])
    label = example['class']
    return image, label

def load_dataset(filenames, labeled=True, ordered=False):
    # Read from TFRecords. For optimal performance, read from multiple files at once.
    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False # disable order, increase speed

    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTO)
    dataset = dataset.with_options(ignore_order)
    dataset = dataset.map(read_labeled_tfrecord, num_parallel_calls=AUTO)
    return dataset


Train TFRecords: 16
Val TFRecords: 16


# Dataset Oluşturma


In [3]:
def get_training_dataset():
    dataset = load_dataset(TRAIN_FILENAMES, labeled=True)
    dataset = dataset.repeat() # Sonsuz döngü (steps_per_epoch ile kontrol edilir)
    dataset = dataset.shuffle(2048)
    dataset = dataset.batch(32)
    dataset = dataset.prefetch(AUTO)
    return dataset

def get_validation_dataset():
    dataset = load_dataset(VAL_FILENAMES, labeled=True, ordered=True)
    dataset = dataset.batch(32)
    dataset = dataset.cache()
    dataset = dataset.prefetch(AUTO)
    return dataset

if len(TRAIN_FILENAMES) > 0:
    train_ds = get_training_dataset()
    val_ds = get_validation_dataset()
    print('Datasetler hazır.')
else:
    print('HATA: TFRecord dosyaları bulunamadı.')


Datasetler hazır.


# Modelleme (MobileNetV2)


In [4]:
with tf.device('/CPU:0'): # GPU yoksa CPU zorla (veya GPU varsa otomatik kullanır)
    base_model = tf.keras.applications.MobileNetV2(
        input_shape=(*IMAGE_SIZE, 3),
        include_top=False,
        weights='imagenet'
    )
    base_model.trainable = False
    
    model = tf.keras.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dense(104, activation='softmax')
    ])
    
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    model.summary()


# Eğitim


In [5]:
if len(TRAIN_FILENAMES) > 0:
    # TFRecord infinite loop olduğu için steps_per_epoch belirtilmeli
    # Tahmini: Dosya sayısı * Dosya başı örnek / Batch size
    # Örnek: 12753 training images / 32 = ~398 steps
    
    steps_per_epoch = 12753 // 32
    
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=5,
        steps_per_epoch=steps_per_epoch
    )


Epoch 1/5
[1m398/398[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151s[0m 360ms/step - accuracy: 0.5813 - loss: 1.8161 - val_accuracy: 0.7123 - val_loss: 1.1217
Epoch 2/5




[1m398/398[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 340ms/step - accuracy: 0.7962 - loss: 0.8161 - val_accuracy: 0.7602 - val_loss: 0.9388
Epoch 3/5
[1m398/398[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 349ms/step - accuracy: 0.8554 - loss: 0.5759 - val_accuracy: 0.7799 - val_loss: 0.8515
Epoch 4/5
[1m398/398[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 343ms/step - accuracy: 0.8904 - loss: 0.4420 - val_accuracy: 0.7888 - val_loss: 0.8143
Epoch 5/5
[1m398/398[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 349ms/step - accuracy: 0.9179 - loss: 0.3520 - val_accuracy: 0.7953 - val_loss: 0.8043


# Kaydetme


In [6]:
if 'model' in locals():
    os.makedirs('../models', exist_ok=True)
    model.save('../models/best_model.keras')
    print('Model Saved.')


Model Saved.


In [7]:
# --- DÜZELTİLMİŞ SUBMISSION KODU ---
import pandas as pd
import numpy as np
import os
import tensorflow as tf

# Test TFRecord formatını tanımlayan ve okuyan fonksiyon
def read_test_tfrecord(example):
    TEST_TFREC_FORMAT = {
        "image": tf.io.FixedLenFeature([], tf.string),
        "id": tf.io.FixedLenFeature([], tf.string),
    }
    example = tf.io.parse_single_example(example, TEST_TFREC_FORMAT)
    image = decode_image(example['image'])
    idnum = example['id']
    return image, idnum

# Test Datasetini Yükleme Fonksiyonu
def get_test_dataset(filenames):
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTO)
    dataset = dataset.map(read_test_tfrecord, num_parallel_calls=AUTO)
    dataset = dataset.batch(32) # Batch size
    dataset = dataset.prefetch(AUTO)
    return dataset

# Dosyaları Bul
TEST_FILENAMES = tf.io.gfile.glob(DATA_DIR + '/test/*.tfrec')

if len(TEST_FILENAMES) > 0:
    print(f'{len(TEST_FILENAMES)} adet Test dosyası bulundu. Tahmin yapılıyor...')

    # 1. Test Datasetini Yükle
    test_ds = get_test_dataset(TEST_FILENAMES)

    # 2. Sadece görüntüleri model için ayır
    test_images_ds = test_ds.map(lambda image, idnum: image)

    # 3. Model ile Tahmin (Bu işlem biraz sürebilir)
    print("Model tahmin üretiyor...")
    probabilities = model.predict(test_images_ds)
    predictions = np.argmax(probabilities, axis=-1)

    # 4. ID'leri al (numpy string olarak)
    print("ID'ler çıkarılıyor...")
    test_ids_ds = test_ds.map(lambda image, idnum: idnum).unbatch()

    # ID'leri güvenli bir şekilde listeye çevirelim
    test_ids = list(test_ids_ds.as_numpy_iterator())
    test_ids = [x.decode('utf-8') for x in test_ids] # Byte string'i normal stringe çevir

    # Boyut Kontrolü
    if len(test_ids) != len(predictions):
        print(f"UYARI: ID sayısı ({len(test_ids)}) ile tahmin sayısı ({len(predictions)}) uyuşmuyor!")
        # Genellikle dataset.batch() son batch'i eksik bırakabilir veya repeat sorunu olabilir
        # Ancak yukarıdaki kodda repeat yok, düzgün çalışmalı.

    # 5. DataFrame
    submission = pd.DataFrame({
        'id': test_ids[:len(predictions)], # Güvenlik için slice
        'label': predictions
    })

    # 6. Kaydet
    os.makedirs('../outputs', exist_ok=True)
    submission_path = '../outputs/submission.csv'
    submission.to_csv(submission_path, index=False)

    print(f'Submission başarıyla kaydedildi: {submission_path}')
    display(submission.head())

else:
    print("HATA: Test dosyaları bulunamadı.")

16 adet Test dosyası bulundu. Tahmin yapılıyor...
Model tahmin üretiyor...
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 273ms/step
ID'ler çıkarılıyor...
Submission başarıyla kaydedildi: ../outputs/submission.csv


Unnamed: 0,id,label
0,252d840db,67
1,1c4736dea,28
2,c37a6f3e9,67
3,00e4f514e,103
4,59d1b6146,70


In [8]:
# --- Export model artifacts for Local + HF (recommended: weights) ---
from pathlib import Path

models_dir = Path("models")
models_dir.mkdir(parents=True, exist_ok=True)

keras_path = models_dir / "best_model.keras"
weights_path = models_dir / "best_model.weights.h5"

# Save full model (optional on HF; may fail to deserialize)
model.save(keras_path)

# Save weights-only (recommended; used first in HF)
model.save_weights(weights_path)

print("Saved:")
print(" -", keras_path.resolve())
print(" -", weights_path.resolve())

# Copy to src/ for HF layout (model in src/)
src_dir = Path("src")
if src_dir.exists():
    (src_dir / "best_model.weights.h5").write_bytes(weights_path.read_bytes())
    # copy .keras too if you want, but not required for HF
    try:
        (src_dir / "best_model.keras").write_bytes(keras_path.read_bytes())
    except Exception as e:
        print("Copy .keras to src failed:", e)
    print("Copied weights to src/:", (src_dir / "best_model.weights.h5").resolve())


Saved:
 - C:\Users\Erhan\Documents\0.YapayZekaKursu\Projects\PBL Level2\Hw.15.BecomeAPro\8.CV_PetalsToTheMetal\notebooks\models\best_model.keras
 - C:\Users\Erhan\Documents\0.YapayZekaKursu\Projects\PBL Level2\Hw.15.BecomeAPro\8.CV_PetalsToTheMetal\notebooks\models\best_model.weights.h5
