<a href="https://www.kaggle.com/code/comeozanne/wastetide-yolov7?scriptVersionId=212704914" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

## Data preprocessing

We start by sorting our data and make it compatible with yolov7 model


In [1]:
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    print(len(filenames))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

0
0
989


In [2]:
os.mkdir('dataset')

In [3]:
import shutil
destination = 'dataset'

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        shutil.copy(os.path.join(dirname, filename),destination)
        

In [4]:
os.mkdir('dataset/train')
os.mkdir('dataset/test')
os.mkdir('dataset/val')

In [5]:
for dir in ['train','test','val']:
    folder = os.path.join(destination,dir)
    for name in ['images', 'labels']:
        os.mkdir(os.path.join(folder,name))
    

In [6]:
# not very useful but not very time consuming either
def delete(filepath):
    try:
        os.remove(filepath)
        print(f"file {filepath} was removed")
    except FileNotFoundError:
        print("file does not exist")
    except PermissionError:
        print("permission denied : cannot delete file.")
    except Exception as e:
        print(f"An error occured : {e}")

In [7]:
import os
import shutil
import random

def random_split(root, destination, a=0.7, b=0.2):
    """
    Divise les fichiers dans les répertoires 'train', 'val', et 'test' selon les proportions données.
    
    Args:
        root (str): Chemin vers le répertoire contenant les fichiers.
        destination (str): Chemin où les fichiers seront divisés en 'train', 'val', et 'test'.
        a (float): Proportion des fichiers pour le dossier 'train' (par défaut 0.7).
        b (float): Proportion des fichiers pour le dossier 'val' (par défaut 0.2).
    """
    # Vérifie que les proportions sont correctes
    if a + b >= 1.0:
        raise ValueError("Les proportions a et b doivent être inférieures à 1.")

    # Création des sous-dossiers si nécessaires
    train_dir = os.path.join(destination, 'train')
    val_dir = os.path.join(destination, 'val')
    test_dir = os.path.join(destination, 'test')
    
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(val_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)
    
    # Liste des fichiers dans le répertoire source
    files = [f for f in os.listdir(root) if f.endswith('.txt')]
    
    # Mélange aléatoire des fichiers
    random.shuffle(files)
    
    # Répartition des fichiers
    num_train = int(len(files) * a)
    num_val = int(len(files) * b)
    
    train_files = files[:num_train]
    val_files = files[num_train:num_train + num_val]
    test_files = files[num_train + num_val:]
    
    # Déplacement des fichiers
    for file_list, target_dir in [(train_files, train_dir), (val_files, val_dir), (test_files, test_dir)]:
        for filename in file_list:
            # Déplace le fichier texte
            shutil.move(os.path.join(root, filename), os.path.join(os.path.join(target_dir, 'labels'),filename))
            
            # Vérifie et déplace l'image associée si elle existe
            image_file = filename.replace('.txt', '.jpg')
            if os.path.exists(os.path.join(root, image_file)):
                shutil.move(os.path.join(root, image_file), os.path.join(os.path.join(target_dir, 'images'),image_file))
    
    print(f"Fichiers répartis : {len(train_files)} dans 'train', {len(val_files)} dans 'val', {len(test_files)} dans 'test'.")

    
random_split(destination, destination, a=0.7, b=0.2)


Fichiers répartis : 344 dans 'train', 98 dans 'val', 50 dans 'test'.


In [8]:
# we remove the few images without annotations 
for file in os.listdir('dataset'):
    if file.endswith('.jpg'):
        print(file)
        delete(os.path.join('dataset',file))

aa688c30-2bed-4dd6-a1b5-b16412d39a7d.jpg
file dataset/aa688c30-2bed-4dd6-a1b5-b16412d39a7d.jpg was removed
a9d72b8a-58a2-4598-82db-6a72a0e7cffe.jpg
file dataset/a9d72b8a-58a2-4598-82db-6a72a0e7cffe.jpg was removed
dda74d75-af5c-4683-ad11-7497058bba06.jpg
file dataset/dda74d75-af5c-4683-ad11-7497058bba06.jpg was removed
24fee5fa-399c-40ec-bf12-f8b61ded6321.jpg
file dataset/24fee5fa-399c-40ec-bf12-f8b61ded6321.jpg was removed
855be8b3-2b4e-4df1-af07-c6c08208738f.jpg
file dataset/855be8b3-2b4e-4df1-af07-c6c08208738f.jpg was removed


In [9]:
# we create the yaml file necessary for yolov7
data_yaml = f"""
train: {os.path.join(destination, 'train', 'images')}
val: {os.path.join(destination, 'val', 'images')}
test: {os.path.join(destination, 'test', 'images')}

names:
- biowaste
- cardboard
- electronic
- glass
- hazardous
- metal
- other
- paper
- plastic
- textile
- wood
- PET
- PS

nc: 13
"""

with open(os.path.join(destination, 'data.yaml'), 'w') as f:
    f.write(data_yaml)

In [10]:
print(os.path.exists('dataset/data.yaml'))

True


## Model
We download yolov7

In [11]:
# don't forget to activate internet connection

!git clone https://github.com/WongKinYiu/yolov7.git


Cloning into 'yolov7'...
remote: Enumerating objects: 1197, done.[K
remote: Total 1197 (delta 0), reused 0 (delta 0), pack-reused 1197 (from 1)[K
Receiving objects: 100% (1197/1197), 74.23 MiB | 10.83 MiB/s, done.
Resolving deltas: 100% (519/519), done.


In [12]:
cd yolov7

/kaggle/working/yolov7


In [13]:
!# Download trained weights
!wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7_training.pt

--2024-12-12 14:28:11--  https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7_training.pt
Resolving github.com (github.com)... 140.82.116.3
Connecting to github.com (github.com)|140.82.116.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/511187726/13e046d1-f7f0-43ab-910b-480613181b1f?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20241212%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241212T142811Z&X-Amz-Expires=300&X-Amz-Signature=84ba8e2faa8794a6dc57052c6113c5143e917026c22cdb3cd1d871e3e2acf822&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dyolov7_training.pt&response-content-type=application%2Foctet-stream [following]
--2024-12-12 14:28:11--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/511187726/13e046d1-f7f0-43ab-910b-480613181b1f?X-Amz-Algorithm=AWS4-HMAC-SHA256&X

In [14]:
!pip uninstall wandb -y

Found existing installation: wandb 0.18.7
Uninstalling wandb-0.18.7:
  Successfully uninstalled wandb-0.18.7


In [15]:
cd ..

/kaggle/working


In [16]:
# Train
#don't forget to activate accelerator (GPU) on the right panel, you have 30h free / week on kaggle (0,1 means I'm using 2 GPUs, write only 0 if only 1 GPU)
!python yolov7/train.py --workers 8 --device 0,1 --batch-size 16 --epochs 90 --data dataset/data.yaml  --cfg yolov7/cfg/training/yolov7.yaml --weights yolov7/yolov7_training.pt --name yolov7_wastetide_1 --hyp yolov7/data/hyp.scratch.p5.yaml

  run_id = torch.load(weights, map_location=device).get('wandb_id') if weights.endswith('.pt') and os.path.isfile(weights) else None
[34m[1mwandb: [0mInstall Weights & Biases for YOLOR logging with 'pip install wandb' (recommended)
  ckpt = torch.load(weights, map_location=device)  # load checkpoint
[34m[1mtrain: [0mScanning 'dataset/train/labels' images and labels... 344 found, 0 missing[0m
  self.pid = os.fork()
[34m[1mval: [0mScanning 'dataset/val/labels' images and labels... 98 found, 0 missing, 0 e[0m

[34m[1mautoanchor: [0mAnalyzing anchors... anchors/target = 5.77, Best Possible Recall (BPR) = 1.0000
  scaler = amp.GradScaler(enabled=cuda)
  with amp.autocast(enabled=cuda):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
      0/89     1.29G    0.0799   0.03817   0.04159    0.1597       118       640
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
               Class      Images  

In [17]:
!python yolov7/test.py --data dataset/data.yaml  --weights runs/train/yolov7_wastetide_1/weights/best.pt

Namespace(weights=['runs/train/yolov7_wastetide_1/weights/best.pt'], data='dataset/data.yaml', batch_size=32, img_size=640, conf_thres=0.001, iou_thres=0.65, task='val', device='', single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=False, project='runs/test', name='exp', exist_ok=False, no_trace=False, v5_metric=False)
  ckpt = torch.load(w, map_location=map_location)  # load
Fusing layers... 
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
IDetect.fuse
 Convert model to Traced-model... 
 traced_script_module saved! 
 model is traced! 

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
  cache, exists = torch.load(cache_path), True  # load
[34m[1mval: [0mScanning 'dataset/val/labels.cache' images and labels... 98 found, 0 missin[0m
               Class      Images      Labels           P           R      mAP@.5
                 all          98        1123    

All results for training metrics are in runs/train