In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
    break

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# **Veri Ön İşleme**

**Eksik Veri Kontrolü**

In [None]:
import os
from pathlib import Path
from PIL import Image, UnidentifiedImageError

train_dir = "/kaggle/input/spors-classification/train"

bad_files = []

for root, _, files in os.walk(train_dir):
    for fname in files:
        fpath = Path(root) / fname
        try:
            img = Image.open(fpath)
            img.verify()   # dosyayı açmayı dene
        except (UnidentifiedImageError, OSError):
            bad_files.append(str(fpath))
            try:
                os.remove(fpath)  # bozuk dosyayı sil
                print("Silindi:", fpath)
            except Exception as e:
                print("Silinemedi:", fpath, e)

print(f"\nToplam {len(bad_files)} bozuk/eksik dosya bulundu ve silindi.")

**Veri Dağılımı**

Veri dağılımını kontrol ediyorum. Hangi sınıftan ne kadar örneğe sahibim? Veri dağılımım dengesiz mi? Eğer veri dağılımımda büyük bir dengesizlik söz konusu ile undersampling yapabilirim. Eğer büyük dengesizlik söz konusu değilse class weights ekleyebilirim.

In [None]:
import os
import pandas as pd

train_dir = "/kaggle/input/sports-classification/train"

# her alt klasör = 1 spor türü
class_counts = {}
for class_name in os.listdir(train_dir):
    class_path = os.path.join(train_dir, class_name)
    if os.path.isdir(class_path):
        n_files = len([f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))])
        class_counts[class_name] = n_files

# tabloya dökelim
df_counts = pd.DataFrame(list(class_counts.items()), columns=["class", "count"]).sort_values("count")

print("Her sınıftaki görsel sayısı:")
display(df_counts)

min_class = df_counts.iloc[0]
print(f"\nEn az örneğe sahip sınıf: '{min_class['class']}' ({min_class['count']} resim)")

Veri dağılımımda büyük bir dengesizlik söz konusu değil bu yüzden undersampling yapmak yerine class weights ekliyeceğim

**Veri Görselleştirmesi**

In [None]:
import os
import random
import matplotlib.pyplot as plt
from PIL import Image

train_dir = "/kaggle/input/sports-classification/train"

classes = os.listdir(train_dir)
sample_classes = random.sample(classes, 9)  # 9 sınıf seçelim

plt.figure(figsize=(12,12))
for i, cls in enumerate(sample_classes):
    img_path = os.path.join(train_dir, cls, random.choice(os.listdir(os.path.join(train_dir, cls))))
    img = Image.open(img_path)
    plt.subplot(3,3,i+1)
    plt.imshow(img)
    plt.title(cls)
    plt.axis("off")
plt.show()


In [None]:
import pandas as pd

class_counts = {cls: len(os.listdir(os.path.join(train_dir, cls))) for cls in classes}
df_counts = pd.DataFrame(list(class_counts.items()), columns=["class", "count"]).sort_values("count")

plt.figure(figsize=(12,6))
plt.barh(df_counts["class"], df_counts["count"])
plt.xlabel("Görsel sayısı")
plt.ylabel("Spor sınıfı")
plt.title("Sınıf dağılımı")
plt.show()


# Data Augmentation and Modeling

Önce ImageDataGenerator kullanarak eğitim, doğrulama ve test veri setlerini hazırladım. Eğitim verisine çeşitli data augmentation teknikleri (zoom, kaydırma, döndürme, yatay çevirme) uyguladım.

Ardından sıfırdan bir CNN mimarisi tanımladım. Katmanlarda Conv2D, BatchNormalization ve ReLU kullanarak özellik çıkarımı yaptım, son kısımda ise GlobalAveragePooling2D ve yoğun (Dense) katmanlarla sınıflandırma gerçekleştirdim.

Modeli derlerken Adam optimizer ve categorical_crossentropy loss fonksiyonunu seçtim. Eğitim sürecinde EarlyStopping ve ReduceLROnPlateau callback’lerini ekleyerek aşırı öğrenmeyi (overfitting) engellemeye çalıştım.

Eğitimin ardından eğitim/doğrulama doğruluklarını görselleştirdim, test seti üzerinde modeli değerlendirdim ve en sonunda modeli .h5 formatında kaydettim.

In [None]:
import numpy as np
import pandas as pd
import os
import random
from PIL import Image
import cv2
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt

# ----------------------
# Dizinler
# ----------------------
train_dir = "/kaggle/input/sports-classification/train"
val_dir   = "/kaggle/input/sports-classification/valid"
test_dir  = "/kaggle/input/sports-classification/test"

# ----------------------
# Veri jeneratörleri
# ----------------------
target_size = (224, 224)
batch_size = 32

train_datagen = ImageDataGenerator(
    rescale=1./255,
    zoom_range=0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    rotation_range=15,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(rescale=1./255)

train_dg = train_datagen.flow_from_directory(
    train_dir,
    class_mode="categorical",
    target_size=target_size,
    batch_size=batch_size,
    shuffle=True,
    seed=42
)

validation_dg = val_datagen.flow_from_directory(
    val_dir,
    class_mode="categorical",
    target_size=target_size,
    batch_size=batch_size,
    shuffle=False,
    seed=42
)

testing_dg = val_datagen.flow_from_directory(
    test_dir,
    class_mode="categorical",
    target_size=target_size,
    batch_size=batch_size,
    shuffle=False
)

num_classes = len(train_dg.class_indices)

# ----------------------
# CNN Mimarisi
# ----------------------
def conv_block(x, filters, kernel_size=3, strides=1):
    x = layers.Conv2D(filters, kernel_size, strides=strides, padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    return x

inputs = layers.Input(shape=(target_size[0], target_size[1], 3))

x = conv_block(inputs, 32)
x = conv_block(x, 32)
x = layers.MaxPooling2D()(x)

x = conv_block(x, 64)
x = conv_block(x, 64)
x = layers.MaxPooling2D()(x)

x = conv_block(x, 128)
x = conv_block(x, 128)
x = layers.MaxPooling2D()(x)

x = conv_block(x, 256)
x = conv_block(x, 256)
x = layers.MaxPooling2D()(x)

x = layers.Conv2D(512, 3, padding="same", use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)

x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(512, activation="relu")(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(num_classes, activation="softmax")(x)

cnn_model = models.Model(inputs, outputs)

# ----------------------
# Derleme
# ----------------------
cnn_model.compile(
    optimizer=Adam(learning_rate=3e-4),   # CNN için bir tık daha yüksek LR genelde iyi çalışır
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# ----------------------
# Callback'ler
# ----------------------
callbacks = [
    EarlyStopping(patience=5, restore_best_weights=True, monitor="val_accuracy"),
    ReduceLROnPlateau(factor=0.5, patience=3, monitor="val_accuracy")
]

# ----------------------
# Eğitim
# ----------------------
history = cnn_model.fit(
    train_dg,
    validation_data=validation_dg,
    epochs=50,
    callbacks=callbacks
)

# ----------------------
# Grafik
# ----------------------
plt.figure()
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.show()

# ----------------------
# Test
# ----------------------
test_results = cnn_model.evaluate(testing_dg)
print(f"Test Loss: {test_results[0]:.4f}")
print(f"Test Accuracy: {test_results[1]:.4f}")

# ----------------------
# Kaydet
# ----------------------
cnn_model.save("cnn_custom_classifier.h5")
