# üß† TinyML - Colab Training Notebook

**Version:** V.1.3.13 - Optimized for main branch

This notebook trains your TinyML model on Google Colab GPU.

**Flow:**
1. ‚úÖ Check GPU availability
2. üì• Clone or update from GitHub (main branch)
3. üì¶ Install Colab-compatible dependencies
4. üöÄ Run training script (`scripts/train.py`) ‚Üí saves `src/models/global_model.h5`
5. üíæ Save exported models to Google Drive


**Key Features:**

- Uses `main` branch (latest stable code)- TFLite export support

- Automatic dependency management- Timestamped model preservation

In [None]:
# 1. Íµ¨Í∏Ä ÎìúÎùºÏù¥Î∏å Ïó∞Í≤∞
from google.colab import drive
drive.mount('/content/drive')

# 2. Îç∞Ïù¥ÌÑ∞ Ìè¥Îçî Í≤ΩÎ°ú ÏÑ§Ï†ï
DATA_DIR = "/content/drive/MyDrive/TinyML_models"

## 1Ô∏è‚É£ Runtime & GPU check
Make sure you set **Runtime ‚Üí Change runtime type ‚Üí Hardware accelerator ‚Üí GPU** before running.


In [None]:
!nvidia-smi || echo "No NVIDIA GPU detected. Please enable GPU in Runtime settings."


## 2Ô∏è‚É£ Clone or update TinyML repository

Set your GitHub repo URL if different.


In [None]:
import os
import sys
import subprocess

REPO_URL = "https://github.com/danielsoo/TinyML.git"  # change if needed
PROJECT_DIR = "/content/TinyML"

print("üîÑ Updating repository from GitHub...")
if not os.path.exists(PROJECT_DIR):
    print(f"üì• Cloning repository from {REPO_URL}...")
    result = subprocess.run(["git", "clone", REPO_URL, PROJECT_DIR], 
                          capture_output=True, text=True, check=True)
    print("‚úÖ Repository cloned successfully")
else:
    print(f"üì• Pulling latest changes from {REPO_URL}...")
    # Change to project directory and pull
    os.chdir(PROJECT_DIR)
    # Fetch latest changes
    subprocess.run(["git", "fetch", "origin"], 
                  capture_output=True, text=True, check=False)
    # Pull latest changes
    result = subprocess.run(["git", "pull", "origin", "main"], 
                          capture_output=True, text=True, check=False)
    if result.returncode == 0:
        print("‚úÖ Repository updated successfully")
        if result.stdout.strip():
            print(f"   Changes: {result.stdout.strip()[:100]}")
    else:
        print(f"‚ö†Ô∏è  Git pull had issues (may be up to date): {result.stderr[:100]}")
        # Try to continue anyway

os.chdir(PROJECT_DIR)

# Show current commit
commit_result = subprocess.run(["git", "log", "-1", "--oneline"], 
                              capture_output=True, text=True, check=False)
if commit_result.returncode == 0:
    print(f"üìå Current commit: {commit_result.stdout.strip()}")

# Add project directory to Python path for module imports
if PROJECT_DIR not in sys.path:
    sys.path.insert(0, PROJECT_DIR)

print(f"\n‚úÖ Project directory ready: {PROJECT_DIR}")


## 2Ô∏è‚É£.5 Update config data path
Google DriveÏóê ÏûàÎäî Îç∞Ïù¥ÌÑ∞ Í≤ΩÎ°ú(`DATA_DIR`)Î•º `config/federated_colab.yaml`Ïóê Î∞òÏòÅÌï©ÎãàÎã§.



In [None]:
import yaml
from pathlib import Path

config_path = Path(PROJECT_DIR) / "config" / "federated_colab.yaml"

if config_path.exists():
    with config_path.open("r") as f:
        cfg = yaml.safe_load(f)

    cfg.setdefault("data", {})
    cfg["data"]["path"] = DATA_DIR

    with config_path.open("w") as f:
        yaml.safe_dump(cfg, f, sort_keys=False, allow_unicode=True)

    print(f"Updated federated_colab.yaml data.path -> {cfg['data']['path']}")
else:
    raise FileNotFoundError(f"Cannot find {config_path}")



## 3Ô∏è‚É£ Install Colab-compatible dependencies

**IMPORTANT:** After running the next cell, you MUST restart the runtime!
- Go to: **Runtime ‚Üí Restart runtime**
- Then skip directly to the training cell (Cell 6Ô∏è‚É£)

This ensures protobuf version stays fixed at 3.20.3 for TensorFlow compatibility.

## 4Ô∏è‚É£ Install dependencies (Colab compatible)

- Installs from `colab_requirements.txt` if present.
- Installs standard `tensorflow` for Linux GPU.


In [None]:
import os
import subprocess

print("üì¶ Installing dependencies...")

# Install from colab-specific requirements
colab_req_file = os.path.join(PROJECT_DIR, "colab/requirements_colab.txt")
if os.path.exists(colab_req_file):
    print(f"   Installing from {colab_req_file}...")
    subprocess.run(["pip", "install", "-r", colab_req_file], check=False)
else:
    print(f"‚ö†Ô∏è  {colab_req_file} not found.")

# Fix protobuf version FIRST before other packages
print("\nüîß Fixing protobuf compatibility...")
subprocess.run(["pip", "uninstall", "-y", "protobuf"], check=False)
subprocess.run(["pip", "install", "protobuf==3.20.3"], check=False)

# Install compatible flwr version (without deps to avoid protobuf upgrade)
print("   Installing Flower with compatible version...")
subprocess.run(["pip", "install", "flwr==1.6.0", "--no-deps"], check=False)

# Install flwr dependencies separately
subprocess.run([
    "pip", "install",
    "cryptography<42.0.0,>=41.0.2",
    "grpcio!=1.52.0,<2.0.0,>=1.48.2",
    "iterators>=0.0.2,<0.0.3",
    "numpy>=1.21.0,<2.0.0",
    "pycryptodome>=3.18.0,<4.0.0"
], check=False)

print("\n‚úÖ Dependencies installed successfully")
print("‚ö†Ô∏è  IMPORTANT: Restart runtime to apply changes!")
print("   Go to: Runtime ‚Üí Restart runtime")
print("   Then skip to the training cell.")

In [None]:
# Verify installation and check compatibility
import tensorflow as tf
import google.protobuf

print("‚úÖ TensorFlow version:", tf.__version__)
print("‚úÖ Protobuf version:", google.protobuf.__version__)
print("‚úÖ GPU devices:", tf.config.list_physical_devices('GPU'))

# Check if protobuf version is correct
if google.protobuf.__version__ != "3.20.3":
    print("‚ö†Ô∏è  WARNING: Protobuf version mismatch!")
    print("   Please restart runtime and skip to training cell.")
else:
    print("‚úÖ All versions compatible!")

## 5Ô∏è‚É£ (Optional) Download or prepare dataset

Edit this cell if your training script expects data in a specific path.
For example, you can mount Google Drive or download from Kaggle here.


In [None]:
# Example: mount Google Drive if your data is stored there.
# from google.colab import drive
# drive.mount('/content/drive')

# Example: create a data directory
# os.makedirs('data', exist_ok=True)
# Then copy or download your dataset into ./data

print("Dataset preparation step: customize as needed.")


## 6Ô∏è‚É£ Run training

This cell tries to run `train.py` at repo root.
If your main script is at a different path, edit accordingly (e.g. `src/train.py`).


In [None]:
import pandas as pd
from pathlib import Path

data_dir = Path("/content/drive/MyDrive/TinyML_models")  # CSVÎì§Ïù¥ ÏûàÎäî Í≤ΩÎ°úÎ°ú ÏàòÏ†ï
csv_paths = sorted(data_dir.glob("*.csv"))

dfs = [pd.read_csv(p, low_memory=False) for p in csv_paths]
df = pd.concat(dfs, ignore_index=True)

print("Total Samples:", len(df))
print(df["attack"].value_counts())
print(df["attack"].value_counts(normalize=True))  # ÎπÑÏú® ÌôïÏù∏

In [None]:
import os
import sys
from datetime import datetime

PROJECT_DIR = "/content/TinyML"
os.chdir(PROJECT_DIR)

# Add project directory to Python path
if PROJECT_DIR not in sys.path:
    sys.path.insert(0, PROJECT_DIR)

# Use train.py script (unified training script)
# This will automatically detect Colab environment and use federated_colab.yaml
print("üöÄ Running training with scripts/train.py...")
print("   This will use config/federated_colab.yaml automatically")
print("   You will see training progress below...\n")
print("="*60)

# Use ! command for real-time output in Colab
!python scripts/train.py --config config/federated_colab.yaml

# Check if model was actually saved
from pathlib import Path
model_path = Path(PROJECT_DIR) / "src" / "models" / "global_model.h5"
model_exists = model_path.exists() and model_path.stat().st_size > 0

print("\n" + "="*60)
if model_exists:
    print("‚úÖ Training complete!")
    print(f"   Model saved to: src/models/global_model.h5")
    # Show model size
    size_mb = model_path.stat().st_size / (1024 * 1024)
    print(f"   Model size: {size_mb:.2f} MB")
else:
    print("‚ùå Training failed!")
    print("   Model file not found.")

## 7Ô∏è‚É£ Model Compression

Apply pruning and quantization to compress the trained model.
This will create multiple compressed versions for analysis.

In [None]:
# Apply compression without evaluation (to avoid data shape mismatch)
import os
import sys
import shutil
import tensorflow as tf
import numpy as np

PROJECT_DIR = "/content/TinyML"
os.chdir(PROJECT_DIR)

if PROJECT_DIR not in sys.path:
    sys.path.insert(0, PROJECT_DIR)

# Check if model exists
model_path = "src/models/global_model.h5"
if not os.path.exists(model_path):
    print("‚ö†Ô∏è  WARNING: No trained model found!")
    print("‚ö†Ô∏è  Please run the training step first.")
else:
    from src.modelcompression.pruning import apply_structured_pruning
    
    # Load model
    print("üì¶ Loading trained model...")
    model = tf.keras.models.load_model(model_path)
    print(f"‚úÖ Model loaded (input shape: {model.input_shape})\n")
    
    # Create output directory
    os.makedirs("models/tflite", exist_ok=True)
    
    # 1. Export baseline TFLite (Float32)
    print("üíæ Step 1: Exporting Baseline TFLite (Float32)")
    print("-" * 60)
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    baseline_tflite = converter.convert()
    baseline_path = "models/tflite/saved_model_original.tflite"
    with open(baseline_path, "wb") as f:
        f.write(baseline_tflite)
    baseline_size = len(baseline_tflite) / 1024
    print(f"‚úÖ Saved: {baseline_path} ({baseline_size:.2f} KB)\n")
    
    # 2. Apply Pruning
    print("‚úÇÔ∏è  Step 2: Applying Structured Pruning (50% pruning_ratio)")
    print("-" * 60)
    pruned_model = apply_structured_pruning(model, pruning_ratio=0.5)
    
    # Save pruned H5
    pruned_h5_path = "models/test_pruned_model.h5"
    pruned_model.save(pruned_h5_path)
    print(f"‚úÖ Saved pruned model: {pruned_h5_path}\n")
    
    # 3. Export pruned TFLite (Float32)
    print("üíæ Step 3: Exporting Pruned TFLite (Float32)")
    print("-" * 60)
    converter = tf.lite.TFLiteConverter.from_keras_model(pruned_model)
    pruned_tflite = converter.convert()
    pruned_path = "models/tflite/saved_model_pruned.tflite"
    with open(pruned_path, "wb") as f:
        f.write(pruned_tflite)
    pruned_size = len(pruned_tflite) / 1024
    print(f"‚úÖ Saved: {pruned_path} ({pruned_size:.2f} KB)\n")
    
    # 4. Apply INT8 Quantization to TFLite
    print("‚ö° Step 4: Applying INT8 Quantization")
    print("-" * 60)
    
    # Create dummy representative dataset (since we can't load actual data)
    input_shape = model.input_shape[1:]
    def representative_dataset():
        for _ in range(100):
            data = np.random.randn(1, *input_shape).astype(np.float32)
            yield [data]
    
    # Apply INT8 quantization using TFLite converter
    converter = tf.lite.TFLiteConverter.from_keras_model(pruned_model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.representative_dataset = representative_dataset
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    converter.inference_input_type = tf.int8
    converter.inference_output_type = tf.int8
    
    quantized_tflite = converter.convert()
    
    quantized_path = "models/tflite/saved_model_pruned_quantized.tflite"
    with open(quantized_path, "wb") as f:
        f.write(quantized_tflite)
    quantized_size = len(quantized_tflite) / 1024
    
    print(f"‚úÖ Saved: {quantized_path} ({quantized_size:.2f} KB)\n")
    
    # Summary
    print("="*60)
    print("‚úÖ COMPRESSION COMPLETE")
    print("="*60)
    print(f"\nüìä Size Comparison:")
    print(f"  Baseline TFLite:    {baseline_size:.2f} KB")
    print(f"  Pruned TFLite:      {pruned_size:.2f} KB ({baseline_size/pruned_size:.2f}x)")
    print(f"  Quantized TFLite:   {quantized_size:.2f} KB ({baseline_size/quantized_size:.2f}x)")
    print(f"\nüíæ Compressed models saved to:")
    print(f"  - {pruned_h5_path}")
    print(f"  - {baseline_path}")
    print(f"  - {pruned_path}")
    print(f"  - {quantized_path}")
    print(f"\nüìù Note: Accuracy evaluation will be done in analysis step.")

## 7Ô∏è‚É£.5 Compression Analysis

Analyze the compressed models: size, accuracy, and inference speed.
Generate comparison reports and visualizations.

In [None]:
# Run compression analysis on all models
import os
import sys

PROJECT_DIR = "/content/TinyML"
os.chdir(PROJECT_DIR)

# Add project directory to Python path
if PROJECT_DIR not in sys.path:
    sys.path.insert(0, PROJECT_DIR)

# Analyze all compressed models
models_to_analyze = [
    "Baseline:src/models/global_model.h5",
    "Baseline-TFLite:models/global_model.tflite",
    "Pruned:models/pruned_model.h5",
    "Pruned-TFLite:models/pruned_model.tflite",
    "Pruned+Quantized:models/pruned_quantized.tflite"
]

# Filter only existing models
existing_models = []
for model_spec in models_to_analyze:
    model_path = model_spec.split(":", 1)[1]
    if os.path.exists(model_path):
        existing_models.append(model_spec)
    else:
        print(f"‚ö†Ô∏è  Model not found: {model_path}")

if not existing_models:
    print("‚ùå No models found for analysis!")
    print("Please run the compression step first.")
else:
    models_str = " ".join([f'"{m}"' for m in existing_models])
    
    cmd = f"""python scripts/analyze_compression.py \
        --models {models_str} \
        --baseline src/models/global_model.h5 \
        --config config/federated_colab.yaml \
        --output-dir data/processed/analysis \
        --format all"""
    
    print("Running compression analysis...")
    print(f"Analyzing {len(existing_models)} models:\n")
    for model in existing_models:
        print(f"  ‚úì {model}")
    print()
    
    !{cmd}

In [None]:
# Run FGSM attack testing on compressed models
import os
import sys

PROJECT_DIR = "/content/TinyML"
os.chdir(PROJECT_DIR)

# Add project directory to Python path
if PROJECT_DIR not in sys.path:
    sys.path.insert(0, PROJECT_DIR)

# Check if models exist
baseline_model = "src/models/global_model.h5"
pruned_model = "models/pruned_model.h5"

if not os.path.exists(baseline_model):
    print("‚ö†Ô∏è  WARNING: No trained model found!")
    print("‚ö†Ô∏è  Please run the training step first.")
else:
    print("üîí Testing adversarial robustness with FGSM attack...")
    print("This evaluates model security before and after compression.\n")
    
    print("-" * 60)
    print("Testing Baseline Model:")
    print("-" * 60)
    !python scripts/test_fgsm_attack.py --model {baseline_model}
    
    if os.path.exists(pruned_model):
        print("\n" + "-" * 60)
        print("Testing Pruned Model:")
        print("-" * 60)
        !python scripts/test_fgsm_attack.py --model {pruned_model}
    
    print("\n‚úÖ FGSM testing complete!")

In [None]:
# Copy compressed models and analysis to Google Drive
import shutil
import os

# Copy compressed models
models_to_copy = [
    "models/pruned_model.h5",
    "models/pruned_model.tflite",
    "models/pruned_quantized.tflite",
    "models/global_model.tflite"
]

drive_models_dir = "/content/drive/MyDrive/TinyML_models"
os.makedirs(drive_models_dir, exist_ok=True)

print("üì¶ Copying compressed models to Google Drive...")
for model_path in models_to_copy:
    if os.path.exists(model_path):
        dst = os.path.join(drive_models_dir, os.path.basename(model_path))
        shutil.copy(model_path, dst)
        print(f"‚úÖ Copied: {os.path.basename(model_path)}")

# Copy analysis results
analysis_dir = "data/processed/analysis"
drive_analysis_dir = "/content/drive/MyDrive/TinyML_models/analysis"
os.makedirs(drive_analysis_dir, exist_ok=True)

if os.path.exists(analysis_dir):
    print("\nüìä Copying analysis results...")
    for file in os.listdir(analysis_dir):
        src = os.path.join(analysis_dir, file)
        dst = os.path.join(drive_analysis_dir, file)
        if os.path.isfile(src):
            shutil.copy(src, dst)
            print(f"‚úÖ Copied: {file}")

print(f"\n‚úÖ All files saved to Google Drive!")
print(f"   Models: {drive_models_dir}")
print(f"   Analysis: {drive_analysis_dir}")

In [None]:
# Display visualizations inline
from IPython.display import Image, display
import os

analysis_dir = "data/processed/analysis"
plots = [
    "size_vs_accuracy.png",
    "compression_metrics.png",
    "compression_ratio.png"
]

print("üìà Analysis Results:\n")
for plot in plots:
    plot_path = os.path.join(analysis_dir, plot)
    if os.path.exists(plot_path):
        print(f"## {plot.replace('_', ' ').title()}")
        display(Image(plot_path))
        print()
    else:
        print(f"‚ö†Ô∏è Plot not found: {plot_path}")

In [None]:
# Generate visualizations from analysis results
import os
import sys

PROJECT_DIR = "/content/TinyML"
os.chdir(PROJECT_DIR)

# Add project directory to Python path (if not already added)
if PROJECT_DIR not in sys.path:
    sys.path.insert(0, PROJECT_DIR)

results_path = "data/processed/analysis/compression_analysis.csv"

if os.path.exists(results_path):
    print("üìä Generating visualizations...")
    !python scripts/visualize_results.py \
        --results {results_path} \
        --output-dir data/processed/analysis \
        --plot all
    print("\n‚úÖ Visualizations complete!")
else:
    print(f"‚ö†Ô∏è Results file not found: {results_path}")
    print("Please run the compression analysis step first.")

In [None]:
# Save all trained and compressed models to Google Drive
import os
import shutil
from pathlib import Path

# Note: Google Drive already mounted in Cell 2
PROJECT_DIR = "/content/TinyML"

# Models to save (preserving all timestamped versions)
OUTPUT_FILES = [
    "src/models/global_model.h5",  # Latest baseline model
]

# Find all timestamped models
models_dir = Path(PROJECT_DIR) / "src" / "models"
if models_dir.exists():
    timestamped_models = list(models_dir.glob("global_model_*.h5"))
    if timestamped_models:
        OUTPUT_FILES.extend([f"src/models/{f.name}" for f in timestamped_models])
        print(f"üì¶ Found {len(timestamped_models)} timestamped model(s)")

dest_dir = "/content/drive/MyDrive/TinyML_models"
os.makedirs(dest_dir, exist_ok=True)

# Create directory structure in Drive
drive_src_dir = os.path.join(dest_dir, "src", "models")
os.makedirs(drive_src_dir, exist_ok=True)

print("üíæ Saving models to Google Drive...\n")
found_any = False
for fname in OUTPUT_FILES:
    src_path = os.path.join(PROJECT_DIR, fname)
    if os.path.exists(src_path):
        # Preserve directory structure
        dst_path = os.path.join(dest_dir, fname)
        os.makedirs(os.path.dirname(dst_path), exist_ok=True)
        shutil.copy(src_path, dst_path)
        print(f"‚úÖ Saved: {fname}")
        found_any = True

if not found_any:
    print("‚ö†Ô∏è No model files found. Make sure training completed successfully.")
else:
    print(f"\n‚úÖ All models saved to: {dest_dir}/src/models/")
    print("   üìå Timestamped versions preserved")
    print("   üìå Latest: global_model.h5")