# Model Training
This notebook will eventually be dedicated to training models where we can specify target directories to train from and output model performance

In [None]:
from ultralytics import YOLO
import os
import shutil
import sys
from sklearn.model_selection import train_test_split
sys.path.append(os.path.abspath('../src'))
import utils

In [None]:
# paths for images and labels
image_dir = '../data/images/processed/'
label_dir = '../data/labels/formatted/'


In [None]:
# function that moves files to train and validation dirs
def train_val_split(img_source_dir, label_source_dir, img_train_dir, label_train_dir,  img_val_dir, label_val_dir, train_percent=0.8):
    img_files = os.listdir(img_source_dir)

    train_files, val_files = train_test_split(img_files, test_size = 1-train_percent, random_state=42)

    for file in train_files:
        img_source_path = os.path.join(img_source_dir, file)
        img_dest_path = os.path.join(img_train_dir, file)

        label_file = os.path.splitext(file)[0] + '.txt'
        label_source_path = os.path.join(label_source_dir, label_file)
        label_dest_path = os.path.join(label_train_dir, label_file)
        try:
            shutil.move(img_source_path, img_dest_path)
            print(f"Moved {file} to image/train/")
            shutil.move(label_source_path, label_dest_path)
            print(f"Moved {label_file} to label/train/")

        except Exception as e:
            print(f"Error moving {file} to image/train/ and {label_file} to label/train/: {e}")

    for file in val_files:
        img_source_path = os.path.join(img_source_dir, file)
        img_dest_path = os.path.join(img_val_dir, file)

        label_file = os.path.splitext(file)[0] + '.txt'
        label_source_path = os.path.join(label_source_dir, label_file)
        label_dest_path = os.path.join(label_val_dir, label_file)
        try:
            shutil.move(img_source_path, img_dest_path)
            print(f"Moved {file} to image/val/")
            shutil.move(label_source_path, label_dest_path)
            print(f"Moved {label_file} to label/val/")

        except Exception as e:
            print(f"Error moving {file} to image/val/ and {label_file} to label/val/: {e}")
        


In [None]:
img_source_dir = '../data/images/processed'
label_source_dir = '../data/labels/formatted'

img_train_dir = '../data/model_data/images/train'
img_val_dir = '../data/model_data/images/validation'

label_train_dir = '../data/model_data/labels/train'
label_val_dir = '../data/model_data/labels/validation'

In [None]:
train_val_split(img_source_dir, label_source_dir, img_train_dir, label_train_dir, img_val_dir, label_val_dir, train_percent=0.8)

In [None]:
model = YOLO("yolov8n.yaml")
results = model.train(data='data.yaml', epochs=10, imgsz=768)


In [None]:
utils.dump_data()

In [None]:
# define data.yaml file
data_yaml = """
train: ../data/model_data/images/train/ # Path to images directory
val: ../data/model_data/images/validation/ # Path 
""""

In [None]:
# note to self, write a python script that writes down all the images of each training, validation split for each time that the model is trained to keep track of data performance.

In [None]:
for label in os.listdir(label_source_dir):
    file_path = os.path.join(label_source_dir, label)

    with open(file_path, 'r') as file:
        content = file.read()
    
    content = content.replace(',', '')

    with open(file_path, 'w') as file:
        file.write(content)

In [None]:
# C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.1

In [1]:
import torch
print("CUDA available:", torch.cuda.is_available())
print("GPU Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No CUDA device found")

CUDA available: False
GPU Name: No CUDA device found
