In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
%%capture
!pip install ultralytics

In [None]:
import random
import shutil
from collections import defaultdict
import matplotlib.pyplot as plt

import torch

from ultralytics import YOLO

In [None]:
# making writable dir for YOLO
!mkdir /kaggle/working/data

# copying the data over
!cp -r /kaggle/input/dlp-object-detection-week-10/final_dlp_data/final_dlp_data /kaggle/working/data

# making appropriate dirs for val data
!mkdir /kaggle/working/data/final_dlp_data/val
!mkdir /kaggle/working/data/final_dlp_data/val/images
!mkdir /kaggle/working/data/final_dlp_data/val/labels

In [None]:
# figuring class distribution

train_dir = '/kaggle/input/dlp-object-detection-week-10/final_dlp_data/final_dlp_data/train/labels'
label_dict = defaultdict(int)

for file in os.listdir(train_dir):
    f = open(os.path.join(train_dir, file), 'r')
    label = f.readline().split()[0] # first element is the class label
    label_dict[label] += 1
f.close()

# dict to store count of instances per class
label_dict = dict(label_dict)

# bar plot
plt.bar(label_dict.keys(), label_dict.values());

In [None]:
# defining paths
train_images_path = '/kaggle/working/data/final_dlp_data/train/images'
train_labels_path = '/kaggle/working/data/final_dlp_data/train/labels'
val_images_path = '/kaggle/working/data/final_dlp_data/val/images'
val_labels_path = '/kaggle/working/data/final_dlp_data/val/labels'

# ensuring validation dirs exist
os.makedirs(val_images_path, exist_ok=True)
os.makedirs(val_labels_path, exist_ok=True)

# getting list of training images
train_images = os.listdir(train_images_path)

count = 0
for file in train_images:
    source_image_file = os.path.join(train_images_path, file)
    source_label_file = os.path.join(train_labels_path, os.path.splitext(file)[0] + '.txt')
    
    # if not os.path.exists(source_label_file):
    #     continue  # skip if label file does not exist
    
    with open(source_label_file, 'r') as f:
        label = f.readline().split()[0] # first element
    
    rand_num = random.randint(1, 10)
    move_probability = {'0': True, '2': True, '4': rand_num <= 2, '5': rand_num <= 2}
    
    if move_probability.get(label, rand_num <= 1):
        shutil.move(source_image_file, val_images_path)
        shutil.move(source_label_file, val_labels_path)
        count += 1

print(f'Total images moved: {count}')

In [None]:
yaml = """
path: /kaggle/working/data/final_dlp_data
train: train/images
val: val/images
test: test/images
names:
    0: aegypti
    1: albopictus
    2: anopheles
    3: culex
    4: culiseta
    5: japonicus/koreicu
"""

# saving the YAML file
with open("/kaggle/working/mosquitoes.yaml", "w") as f:
    f.write(yaml)

### Training

In [None]:
model = YOLO("yolov8n.pt")

# training the model
results = model.train(data="/kaggle/working/mosquitoes.yaml", epochs=50, imgsz=640)

In [None]:
# storing trained model parameters
shutil.make_archive("/kaggle/working/trained_model", "zip", "/kaggle/working/runs/detect/train")
print("Zipped model saved at /kaggle/working/trained_model.zip")

### Prediction

In [None]:
results = model('/kaggle/working/data/final_dlp_data/test/images',
                conf=0.1, iou=0.5, verbose=False)

In [None]:
# print(len(results))
# print(results[0])

In [None]:
predictions = open('/kaggle/working/submission.csv', 'w')
predictions.write("id,ImageID,LabelName,Conf,xcenter,ycenter,bbx_width,bbx_height\n")

counter = 0
for i, result in enumerate(results):
    if result.boxes.cls.numel() == 0:
        cls = result.names[0]
        conf = 0.0
        x = 0.0
        y = 0.0
        w = 0.0
        h = 0.0
    else:
        best_idx = torch.argmax(result.boxes.conf)
        cls = result.names[int(result.boxes.cls[best_idx].item())]
        conf = result.boxes.conf[best_idx].item()
        x = result.boxes.xywhn[best_idx][0].item()
        y = result.boxes.xywhn[best_idx][1].item()
        w = result.boxes.xywhn[best_idx][2].item()
        h = result.boxes.xywhn[best_idx][3].item()
    predictions.write(str(counter) + ',' + os.path.basename(result.path) + ',' + str(cls) +  ',' + str(conf) + ',' + str(x) + ',' + str(y) + ',' + str(w) + ',' + str(h) +'\n' )
    counter += 1 
predictions.close()    

In [None]:
# reordering the submission according to sample_submission.csv
pred_csv_path = "/kaggle/working/submission.csv"
submission_csv_path = "/kaggle/input/dlp-object-detection-week-10/sample_submission.csv"

pred_df = pd.read_csv(pred_csv_path)
submission_df = pd.read_csv(submission_csv_path)

# merging based on 'ImageID' to reorder pred_df
reordered_pred_df = submission_df[['ImageID']].merge(pred_df, on='ImageID', how='left')

# reordering columns to have 'id' as the first column
column_order = ['id', 'ImageID', 'LabelName', 'Conf', 'xcenter', 'ycenter', 'bbx_width', 'bbx_height']
reordered_pred_df = reordered_pred_df[column_order]

# resetting the 'id' column to start from 0
reordered_pred_df['id'] = range(len(reordered_pred_df))

reordered_pred_df.to_csv("submission.csv", index=False)
print(reordered_pred_df.head())


In [None]:
df = pd.read_csv("/kaggle/working/submission.csv")
df