# Kaggle K-Fold Final Execution
This notebook runs the remaining experiments for the Audio Classification project.

**Targets:**
1. **ESC-50**: HybridNet (5 Folds) - *ResNet/MobileNet already done*
2. **EmoDB**: All Models (15 Runs) - *Fresh start*

**Prerequisites:**
- Dataset: `colab_all_kfold.zip` uploaded as input.

In [None]:
import os
import shutil
import zipfile
from pathlib import Path
import sys
import subprocess

# --- CONFIG ---
KAGGLE_INPUT_DIR = Path('/kaggle/input')
WORK_DIR = Path('/kaggle/working/PROJECT')
WIN_PREFIX = r'C:\FYP\PROJECT'

# Clean Workspace
if WORK_DIR.exists():
    shutil.rmtree(WORK_DIR)
WORK_DIR.mkdir(parents=True, exist_ok=True)

# --- 1. EXTRACT DATA ---
# Strategy 1: Look for ZIP
zips = list(KAGGLE_INPUT_DIR.glob('**/*.zip'))

if zips:
    ZIP_PATH = zips[0]
    print(f"Found zip: {ZIP_PATH}")
    print("Extracting zip... (this takes ~1-2 mins)")
    with zipfile.ZipFile(ZIP_PATH, 'r') as zf:
        zf.extractall('/kaggle/working/')
    print("Extraction complete.")
else:
    # Strategy 2: Look for Unzipped Folder (Kaggle sometimes auto-unzips)
    print("No zip found. Searching for unzipped 'product' folder...")
    found_product = list(KAGGLE_INPUT_DIR.glob('**/product'))
    
    if found_product:
        # product folder found, usually in /kaggle/input/datasetname/PROJECT/product
        # We need the PARENT of 'product' (which corresponds to 'PROJECT')
        # Actually, let's just copy everything from the dataset root to working
        # But we need to be careful to get the structure C:\FYP\PROJECT... mapping right
        
        # Let's assume the zip structure was PROJECT/product... 
        # If Kaggle unzips, we might have /kaggle/input/datasetname/product
        # We need /kaggle/working/PROJECT/product
        
        src_product = found_product[0]
        dest_product = WORK_DIR / 'product'
        
        print(f"Found product folder at: {src_product}")
        print(f"Copying to {dest_product}...")
        shutil.copytree(src_product, dest_product)
        
        # Also we need 'diary.md' and others if they exist at sibling level
        src_root = src_product.parent
        for item in src_root.iterdir():
            if item.name != 'product' and item.is_file():
                 shutil.copy(item, WORK_DIR / item.name)
        print("Copy complete.")
        
    else:
        # Debug info
        print("!!! ERROR: Could not find dataset !!!")
        print("Listing /kaggle/input content:")
        for p in KAGGLE_INPUT_DIR.rglob('*'):
             if p.is_dir() and len(p.parts) < 6:
                 print(p)
        raise FileNotFoundError("Dataset not found! Did you add 'colab_all_kfold.zip' to the notebook inputs?")

In [None]:
# --- 2. SETUP ENVIRONMENT & FIX PATHS ---
os.chdir(WORK_DIR)
print(f"Current working directory: {os.getcwd()}")

ROOT = WORK_DIR
SPLITS = ROOT / 'product/artifacts/splits'

fixed = 0
if SPLITS.exists():
    for csv_file in SPLITS.glob('*.csv'):
        text = csv_file.read_text()
        if WIN_PREFIX in text:
            text = text.replace(WIN_PREFIX, str(ROOT) + '/')
            text = text.replace('\\', '/')
            csv_file.write_text(text)
            fixed += 1
    print(f"Fixed paths in {fixed} CSV files.")
else:
    print("WARNING: splits directory not found. Something might be wrong with extraction.")

In [None]:
# --- 3. RUNNING ENGINE ---
def run_kfold(dataset, models):
    print(f"\n[{dataset.upper()}] Starting Runs...")
    RESULTS_DIR = ROOT / 'product/artifacts/runs' / dataset
    RESULTS_DIR.mkdir(parents=True, exist_ok=True)
    
    for model in models:
        for fold in range(5):
            run_id = f"{dataset}_{model}_fold{fold}"
            summary_path = RESULTS_DIR / run_id / 'summary.json'
            
            if summary_path.exists():
                print(f"SKIP {run_id} (Already Done)")
                continue
                
            print(f">>> RUNNING {run_id}")
            
            # Base Params
            args = ['--epochs', '30', '--batch_size', '32']
            
            # OVERRIDE: Dataset Specific Protocol
            if dataset == 'pitt':
                args += ['--weighted_loss', '--lr', '1e-5', '--dropout', '0.7', '--weight_decay', '0.1', '--unfreeze_at', '10']
            elif dataset == 'italian_pd':
                 args += ['--lr', '5e-5', '--dropout', '0.5']
                
            cmd = [sys.executable, '-u', 'product/training/train_unified.py', 
                   '--dataset', dataset, 
                   '--model_type', model, 
                   '--fold', str(fold)] + args
                   
            # Run and stream output
            process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1)
            for line in process.stdout:
                print(line, end='')
            
            process.wait()
            if process.returncode != 0:
                print(f"!!! ERROR in {run_id} !!!")
            else:
                print(f"--- Finished {run_id} ---")

In [None]:
# --- 4. EXECUTE REMAINING EXPERIMENTS ---
# 1. ESC-50: Only HybridNet (ResNet/MobileNet are complete)
print("\n--- STARTING ESC-50 ---")
run_kfold('esc50', ['hybrid'])

# 2. EmoDB: All Models (Fresh start)
print("\n--- STARTING EMODB ---")
run_kfold('emodb', ['resnet50', 'mobilenetv2', 'hybrid'])

In [None]:
# --- 5. ZIP RESULTS ---
output_zip = '/kaggle/working/kfold_final_results.zip'
print(f"Zipping results to {output_zip}...")

def zip_dir(path, ziph):
    for root, dirs, files in os.walk(path):
        for file in files:
            ziph.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), os.path.join(path, '..')))

with zipfile.ZipFile(output_zip, 'w', zipfile.ZIP_DEFLATED) as zipf:
    zip_dir(ROOT / 'product/artifacts/runs', zipf)

print("Done! Download kfold_final_results.zip from the Output tab.")