In [None]:
import os
import shutil
import random
import pandas as pd

In [None]:
# Change to the desired directory
os.chdir('/kaggle/input/datathon-ai-qualification-round')

# Verify the current directory
print(os.getcwd())

In [None]:
# Specify the folder path
folder_path = "/kaggle/working/new_dataset"

try:
    shutil.rmtree(folder_path)
    print(f"Folder '{folder_path}' and its contents have been removed.")
except FileNotFoundError:
    print(f"Folder '{folder_path}' does not exist.")
except Exception as e:
    print(f"Error removing folder: {e}")

In [None]:
# Define paths for the original dataset and the new dataset
original_dataset_path = "train/train"
new_dataset_path = "/kaggle/working/new_dataset"
csv_file_path = "train_data.csv"

# Load the CSV file
train_data = pd.read_csv(csv_file_path)

# Categories to split into
categories = train_data["city"].unique()

# Create subdirectories for train, valid, and test in the new structure
for split in ["train", "valid", "test"]:
    for category in categories:
        os.makedirs(os.path.join(new_dataset_path, split, category), exist_ok=True)

# Function to split dataset
def split_dataset(data, train_ratio=0.79, valid_ratio=0.2, test_ratio=0.01):
    grouped = data.groupby("city")

    for category, group in grouped:
        images = group["filename"].tolist()
        random.shuffle(images)

        train_count = int(len(images) * train_ratio)
        valid_count = int(len(images) * valid_ratio)
        print(len(images))

        train_images = images[:train_count]
        valid_images = images[train_count:train_count + valid_count]
        test_images = images[train_count + valid_count:]
        print(len(train_images))
        print(len(valid_images))
        print(len(test_images))

        # Move images to the new dataset structure
        for image in train_images:
            src = os.path.join(original_dataset_path, image)
            dest = os.path.join(new_dataset_path, "train", category, image)
            if os.path.exists(src):
                shutil.copy(src, dest)

        for image in valid_images:
            src = os.path.join(original_dataset_path, image)
            dest = os.path.join(new_dataset_path, "valid", category, image)
            if os.path.exists(src):
                shutil.copy(src, dest)

        for image in test_images:
            src = os.path.join(original_dataset_path, image)
            dest = os.path.join(new_dataset_path, "test", category, image)
            if os.path.exists(src):
                shutil.copy(src, dest)

# Perform the split
split_dataset(train_data)


In [None]:
# Specify the folder path
folder_path = "/kaggle/working/new_dataset/test/Ankara"

# Count the number of files in the folder
file_count = sum([len(files) for _, _, files in os.walk(folder_path)])

print(f"The number of files in the folder '{folder_path}' is: {file_count}")


In [None]:
!pip install ultralytics

In [None]:
# Change to the desired directory
os.chdir('/kaggle/working/new_dataset')

# Verify the current directory
print(os.getcwd())

In [None]:
# Specify the current and new folder paths
current_folder = "/kaggle/working/new_dataset/valid"
new_folder = "/kaggle/working/new_dataset/val"

# Rename the folder
if os.path.exists(current_folder):
    os.rename(current_folder, new_folder)
    print(f"Renamed folder '{current_folder}' to '{new_folder}'")
else:
    print(f"Folder '{current_folder}' does not exist")


In [None]:
from ultralytics import YOLO

# Load a model
model = YOLO("yolo11x-cls.yaml")  # build a new model from YAML
model = YOLO("yolo11x-cls.pt")  # load a pretrained model (recommended for training)
model = YOLO("yolo11x-cls.yaml").load("yolo11x-cls.pt")  # build from YAML and transfer weights

# Train the model
results = model.train(data="", epochs=150, imgsz=640)

In [None]:
from ultralytics import YOLO
from sklearn.metrics import accuracy_score, f1_score, classification_report
import numpy as np

# Load the trained model
model = YOLO("/kaggle/working/new_dataset/runs/classify/train/weights/best.pt")

# Path to the test dataset
test_path = "/kaggle/working/new_dataset/val" 

# Map class names to indices
class_names = ["Ankara", "Istanbul", "Izmir"]
class_to_idx = {name: i for i, name in enumerate(class_names)}

# Lists to store ground truth and predictions
y_true = []
y_pred = []

# Iterate through each folder (class) in the test dataset
for class_name in os.listdir(test_path):
    class_folder = os.path.join(test_path, class_name)
    if os.path.isdir(class_folder):
        for image_name in os.listdir(class_folder):
            image_path = os.path.join(class_folder, image_name)
            
            # Predict the class of the image
            result = model.predict(source=image_path, imgsz=640, save=False, conf=0.25)
            
            # Extract predicted probabilities and class
            probs = result[0].probs.data.cpu().numpy()  # Extract probabilities as a NumPy array
            predicted_class = np.argmax(probs)  # Get the predicted class index
            
            # Append ground truth and prediction
            y_true.append(class_to_idx[class_name])  # True label
            y_pred.append(predicted_class)          # Predicted label

# Calculate accuracy
accuracy = accuracy_score(y_true, y_pred)

# Calculate F1 score (macro-average for multiple classes)
f1 = f1_score(y_true, y_pred, average="macro")

# Print a detailed classification report
report = classification_report(y_true, y_pred, target_names=class_names)

print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score (Macro): {f1:.4f}")
print("\nClassification Report:\n", report)


In [None]:
# Load the trained model
model = YOLO("/kaggle/working/new_dataset/runs/classify/train/weights/best.pt")

# Load the test.csv file
csv_file_path = "/kaggle/input/datathon-ai-qualification-round/test.csv"
test_df = pd.read_csv(csv_file_path)

# Path to the test images folder
test_images_folder = "/kaggle/input/datathon-ai-qualification-round/test/test"

# Define the class names as per your model's training
class_names = ["Ankara", "Istanbul", "Izmir"]

# Iterate through each row in the CSV to predict the city
for index, row in test_df.iterrows():
    image_path = os.path.join(test_images_folder, row['filename'])
    # Predict the class of the image
    result = model.predict(source=image_path, imgsz=640, save=False, conf=0.25)
    # Get the predicted class index and corresponding city name
    probs = result[0].probs.data.cpu().numpy()  # Extract probabilities as a NumPy array
    predicted_class = class_names[np.argmax(probs)]  # Map index to class name
    # Update the city column with the predicted city name
    test_df.at[index, 'city'] = predicted_class

# Save the updated DataFrame back to a CSV file
output_csv_path = "/kaggle/working/submission.csv"
test_df.to_csv(output_csv_path, index=False)

print(f"Predictions saved to {output_csv_path}")
