In [None]:
!sudo apt-get install -y rclone
# 自动配置rclone
config_content = """
[gdrive]
type = drive
scope = drive
team_drive =
token = nothinghere"""
!mkdir -p ~/.config/rclone
with open("/root/.config/rclone/rclone.conf", "w") as f:
  f.write(config_content)

print("Rclone configured successfully!")

In [None]:
# 挂载谷歌盘
# from google.colab import drive
# drive.mount('/content/drive')
!rclone lsd gdrive:
!rclone sync gdrive:VisDrone /content/VisDrone -P
!ls /content/VisDrone
!unzip /content/VisDrone/VisDrone-YOLO.zip -d /content/
print("Unzipping hard negative samples...")
!tar -xzf /content/VisDrone/negative_samples/hard_negatives.tar.gz -C /content/VisDrone-YOLO/
!pip install ultralytics ruamel.yaml

In [None]:
import os
from ruamel.yaml import YAML

# 配置文件路径
yaml_path = '/content/VisDrone-YOLO/data.yaml'

new_train_paths = [
    'train/images',
    'hard_negatives/images'   # 新增的困难负样本
]

yaml = YAML()
yaml.preserve_quotes = True
data = None

print(f"Updating {yaml_path} for training with negative samples...")

with open(yaml_path, 'r') as f:
    data = yaml.load(f)

# 修改 train 字段
# 同时，我们要确保 data.yaml 里的 'path' 字段被正确设置或移除
# YOLOv8 会自动将 data.yaml 的目录作为根路径
if 'path' in data:
    del data['path']
    print("Removed 'path' key to rely on implicit path discovery.")

data['train'] = new_train_paths

# 打印出来确认一下
print("Updated 'train' paths:", data.get('train'))
print("Current 'val' path:", data.get('val')) # 确认 val 路径没被动过

# 写回文件
with open(yaml_path, 'w') as f:
    yaml.dump(data, f)

print("\n--- Content of updated data.yaml ---")
!cat {yaml_path}
print("------------------------------------")
print("data.yaml has been updated successfully!")

In [None]:
# --- 周期性自动备份脚本 ---
# 这个脚本会启动一个后台进程，每隔 1800 秒（30分钟）就自动同步一次
# 即使训练过程中 Colab 崩溃或断开，你也能在 Google Drive 上找到最近一次的备份

import subprocess
import time

# 定义你要同步的源和目标
source_dir = "/content/runs"
destination_dir = "gdrive:VisDrone/training_results_live" # 建议用一个新名字，避免覆盖旧结果

# 启动一个后台的 rclone 同步循环
# 使用 nohup 确保即使你关闭浏览器标签页，后台进程也能继续运行
command = f"nohup sh -c 'while true; do rclone sync {source_dir} {destination_dir} -P; echo \"Sync completed at $(date)\"; sleep 1800; done' > /content/rclone_sync.log 2>&1 &"

# 执行命令
print("Starting periodic background sync to Google Drive every 30 minutes...")
proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print(f"Background sync process started with PID: {proc.pid}")
print("You can now start your training. Check the log file /content/rclone_sync.log for sync status.")

In [None]:
# !yolo detect train data=/content/VisDrone-YOLO/data.yaml model=yolov8m.pt epochs=85 imgsz=1024 batch=8 name=visdrone_yolov8m_L4_colab workers=4 device=0 half=True
# 给A100用的
!yolo detect train data=/content/VisDrone-YOLO/data.yaml model=yolov8l.pt epochs=90 imgsz=1024 batch=16 name=visdrone_yolov8l_A100_colab workers=8 device=0 half=True

In [None]:
# 训练完成后把结果同步返回给Google Drive
# !mkdir -p /content/drive/MyDrive/VisDrone/training_results/
# !cp -r /content/runs/detect/visdrone_yolov8s_colab_run1 /content/drive/MyDrive/VisDrone/training_results
!rclone sync /content/runs gdrive:VisDrone/training_results -P

In [None]:
# Colab跑完之后同步回本地
# rclone sync gdrive:VisDrone/training_results ~/VisDroneDataset/runs/from_colab -P
# 仅仅作为备份记忆命令用，不要在colab跑这个