# üß† TinyML - Colab Training Notebook

This notebook trains your TinyML model on Google Colab GPU.

**Flow:**
1. Check GPU
2. Clone or update your GitHub repo
3. Install Colab-compatible dependencies
4. Run training script (`train.py` or `src/train.py`)
5. Save exported model (e.g. `tiny_model.tflite`) to Google Drive

Customize the training script path in the **Run training** cell if needed.


In [None]:
# 1. Íµ¨Í∏Ä ÎìúÎùºÏù¥Î∏å Ïó∞Í≤∞
from google.colab import drive
drive.mount('/content/drive')

# 2. Îç∞Ïù¥ÌÑ∞ Ìè¥Îçî Í≤ΩÎ°ú ÏÑ§Ï†ï
DATA_DIR = "/content/drive/MyDrive/TinyML_models"

## 1Ô∏è‚É£ Runtime & GPU check
Make sure you set **Runtime ‚Üí Change runtime type ‚Üí Hardware accelerator ‚Üí GPU** before running.


In [None]:
!nvidia-smi || echo "No NVIDIA GPU detected. Please enable GPU in Runtime settings."


## 2Ô∏è‚É£ Clone or update TinyML repository

Set your GitHub repo URL if different.


In [None]:
import os
import sys
import subprocess

REPO_URL = "https://github.com/danielsoo/TinyML.git"  # change if needed
PROJECT_DIR = "/content/TinyML"

print("üîÑ Updating repository from GitHub...")
if not os.path.exists(PROJECT_DIR):
    print(f"üì• Cloning repository from {REPO_URL}...")
    result = subprocess.run(["git", "clone", REPO_URL, PROJECT_DIR], 
                          capture_output=True, text=True, check=True)
    print("‚úÖ Repository cloned successfully")
else:
    print(f"üì• Pulling latest changes from {REPO_URL}...")
    # Change to project directory and pull
    os.chdir(PROJECT_DIR)
    # Fetch latest changes
    subprocess.run(["git", "fetch", "origin"], 
                  capture_output=True, text=True, check=False)
    # Pull latest changes
    result = subprocess.run(["git", "pull", "origin", "main"], 
                          capture_output=True, text=True, check=False)
    if result.returncode == 0:
        print("‚úÖ Repository updated successfully")
        if result.stdout.strip():
            print(f"   Changes: {result.stdout.strip()[:100]}")
    else:
        print(f"‚ö†Ô∏è  Git pull had issues (may be up to date): {result.stderr[:100]}")
        # Try to continue anyway

os.chdir(PROJECT_DIR)

# Show current commit
commit_result = subprocess.run(["git", "log", "-1", "--oneline"], 
                              capture_output=True, text=True, check=False)
if commit_result.returncode == 0:
    print(f"üìå Current commit: {commit_result.stdout.strip()}")

# Add project directory to Python path for module imports
if PROJECT_DIR not in sys.path:
    sys.path.insert(0, PROJECT_DIR)

print(f"\n‚úÖ Project directory ready: {PROJECT_DIR}")


## 2Ô∏è‚É£.5 Update config data path
Google DriveÏóê ÏûàÎäî Îç∞Ïù¥ÌÑ∞ Í≤ΩÎ°ú(`DATA_DIR`)Î•º `config/federated_colab.yaml`Ïóê Î∞òÏòÅÌï©ÎãàÎã§.



In [None]:
import yaml
from pathlib import Path

config_path = Path(PROJECT_DIR) / "config" / "federated_colab.yaml"

if config_path.exists():
    with config_path.open("r") as f:
        cfg = yaml.safe_load(f)

    cfg.setdefault("data", {})
    cfg["data"]["path"] = DATA_DIR

    with config_path.open("w") as f:
        yaml.safe_dump(cfg, f, sort_keys=False, allow_unicode=True)

    print(f"Updated federated_colab.yaml data.path -> {cfg['data']['path']}")
else:
    raise FileNotFoundError(f"Cannot find {config_path}")



## 3Ô∏è‚É£ Generate Colab-specific requirements (no macOS-only packages)

We remove `tensorflow-macos` and `tensorflow-metal` from `requirements.txt` automatically.


In [None]:
import os

src_req = "requirements.txt"
colab_req = "colab_requirements.txt"

# Skip packages that are preinstalled or incompatible on Colab (tensorflow/numpy variants)
skip_keywords = ["tensorflow", "numpy", "tensorflow-macos", "tensorflow-metal"]

if os.path.exists(src_req):
    with open(src_req, "r") as f:
        lines = f.readlines()

    with open(colab_req, "w") as f:
        for line in lines:
            if any(kw in line for kw in skip_keywords):
                continue
            f.write(line)

    print("Generated:", colab_req)
    with open(colab_req, "r") as f:
        print(f.read())
else:
    print("No requirements.txt found. Skipping Colab requirements generation.")


## 4Ô∏è‚É£ Install dependencies (Colab compatible)

- Installs from `colab_requirements.txt` if present.
- Installs standard `tensorflow` for Linux GPU.


In [None]:
import os
import subprocess

print("üì¶ Installing dependencies...")

if os.path.exists("colab_requirements.txt"):
    print("   Installing from colab_requirements.txt...")
    subprocess.run(["pip", "install", "-r", "colab_requirements.txt"], check=True)
else:
    print("‚ö†Ô∏è  colab_requirements.txt not found. Install your packages manually if needed.")

print("   Installing Flower...")
subprocess.run(["pip", "install", "flwr[simulation]"], check=True)

# Fix protobuf compatibility issue with TensorFlow 2.19.0
# TensorFlow 2.19.0 requires protobuf==3.20.3, but Colab may have newer version
print("\nüîß Fixing protobuf compatibility (TensorFlow 2.19.0 requires protobuf==3.20.3)...")
subprocess.run(["pip", "install", "--force-reinstall", "protobuf==3.20.3"], check=True)
print("‚úÖ Protobuf fixed")

In [None]:
# Fix protobuf compatibility issue (if needed)
# If you see AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'
# Uncomment the following line:
# !pip install protobuf==3.20.3

import tensorflow as tf
print("TensorFlow version:", tf.__version__)
print("GPU devices:", tf.config.list_physical_devices('GPU'))

# Check protobuf version
try:
    import google.protobuf
    print("Protobuf version:", google.protobuf.__version__)
except:
    pass


## 5Ô∏è‚É£ (Optional) Download or prepare dataset

Edit this cell if your training script expects data in a specific path.
For example, you can mount Google Drive or download from Kaggle here.


In [None]:
# Example: mount Google Drive if your data is stored there.
# from google.colab import drive
# drive.mount('/content/drive')

# Example: create a data directory
# os.makedirs('data', exist_ok=True)
# Then copy or download your dataset into ./data

print("Dataset preparation step: customize as needed.")


## 6Ô∏è‚É£ Run training

This cell tries to run `train.py` at repo root.
If your main script is at a different path, edit accordingly (e.g. `src/train.py`).


In [None]:
import pandas as pd
from pathlib import Path

data_dir = Path("/content/drive/MyDrive/TinyML_models")  # CSVÎì§Ïù¥ ÏûàÎäî Í≤ΩÎ°úÎ°ú ÏàòÏ†ï
csv_paths = sorted(data_dir.glob("*.csv"))

dfs = [pd.read_csv(p, low_memory=False) for p in csv_paths]
df = pd.concat(dfs, ignore_index=True)

print("Total Samples:", len(df))
print(df["attack"].value_counts())
print(df["attack"].value_counts(normalize=True))  # ÎπÑÏú® ÌôïÏù∏

In [None]:
import os
from datetime import datetime

PROJECT_DIR = "/content/TinyML"
os.chdir(PROJECT_DIR)

# Generate unique model filename with timestamp to avoid overwriting
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
model_filename = f"global_model_{timestamp}.h5"
model_path = f"src/models/{model_filename}"

# Default entry point: Flower federated simulation with Colab config
# Edit the command below if you want to run a different training script.
print(f"Running python -m src.federated.client --config config/federated_colab.yaml --save-model {model_path}")
!python -m src.federated.client --config config/federated_colab.yaml --save-model {model_path}

# Also save as latest for easy access
latest_path = "src/models/global_model.h5"
if os.path.exists(model_path):
    import shutil
    shutil.copy(model_path, latest_path)
    print(f"\n‚úÖ Also saved as latest: {latest_path}")


## 7Ô∏è‚É£.5 FGSM Adversarial Attack Testing

Test FGSM (Fast Gradient Sign Method) attack on the trained model.
This evaluates the model's robustness against adversarial examples.

**Note:** This requires a trained model from the previous step.


In [None]:
# Run FGSM attack testing
import os
import sys

PROJECT_DIR = "/content/TinyML"
os.chdir(PROJECT_DIR)

# Add project directory to Python path
if PROJECT_DIR not in sys.path:
    sys.path.insert(0, PROJECT_DIR)

# Check if model exists
model_path = "src/models/global_model.h5"
if not os.path.exists(model_path):
    print("‚ö†Ô∏è  WARNING: No trained model found!")
    print("‚ö†Ô∏è  Please run the training step (Cell 17) first.")
    print("‚ö†Ô∏è  The script will train a quick test model, but results will be less accurate.")
    print()

# Run FGSM attack test
print("Running FGSM attack testing...")
print("This may take a few minutes depending on dataset size.\n")

!python scripts/test_fgsm_attack.py


## 7Ô∏è‚É£ Save trained model(s) to Google Drive

This will look for common output filenames (e.g. `tiny_model.tflite`) in the project root and copy them to your Drive.
Edit `OUTPUT_FILES` if your script uses different names or locations.


## 8Ô∏è‚É£ Compression Analysis

Analyze model size, accuracy, and inference speed at each compression stage.
Generate visualizations and reports.



In [None]:
# Export trained model to TFLite (optional, for comparison)
import tensorflow as tf
import yaml

# Load config
with open("config/federated_colab.yaml") as f:
    cfg = yaml.safe_load(f)

# Load trained model
model_path = "src/models/global_model.h5"
if os.path.exists(model_path):
    model = tf.keras.models.load_model(model_path)
    
    # Export to TFLite
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    tflite_model = converter.convert()
    
    # Save
    tflite_path = "src/models/global_model.tflite"
    with open(tflite_path, "wb") as f:
        f.write(tflite_model)
    print(f"‚úÖ Saved TFLite model: {tflite_path}")
else:
    print(f"‚ö†Ô∏è Model not found: {model_path}")



In [None]:
# Run compression analysis
import os
import sys

PROJECT_DIR = "/content/TinyML"
os.chdir(PROJECT_DIR)

# Add project directory to Python path
if PROJECT_DIR not in sys.path:
    sys.path.insert(0, PROJECT_DIR)

# Analyze models
models_to_analyze = ["Baseline:src/models/global_model.h5"]

# Add TFLite if it exists
if os.path.exists("src/models/global_model.tflite"):
    models_to_analyze.append("TFLite:src/models/global_model.tflite")

models_str = " ".join([f'"{m}"' for m in models_to_analyze])

cmd = f"""python scripts/analyze_compression.py \
    --models {models_str} \
    --baseline src/models/global_model.h5 \
    --config config/federated_colab.yaml \
    --output-dir data/processed/analysis \
    --format all"""

print("Running compression analysis...")
print(f"Command: {cmd}\n")
print(f"Python path: {sys.path[:3]}...\n")
!{cmd}



In [None]:
# Generate visualizations
import os
import sys

PROJECT_DIR = "/content/TinyML"
os.chdir(PROJECT_DIR)

# Add project directory to Python path (if not already added)
if PROJECT_DIR not in sys.path:
    sys.path.insert(0, PROJECT_DIR)

results_path = "data/processed/analysis/compression_analysis.csv"

if os.path.exists(results_path):
    print("Generating visualizations...")
    !python scripts/visualize_results.py \
        --results {results_path} \
        --output-dir data/processed/analysis \
        --plot all
else:
    print(f"‚ö†Ô∏è Results file not found: {results_path}")



In [None]:
# Display visualizations inline
from IPython.display import Image, display
import os

analysis_dir = "data/processed/analysis"
plots = [
    "size_vs_accuracy.png",
    "compression_metrics.png",
    "compression_ratio.png"
]

for plot in plots:
    plot_path = os.path.join(analysis_dir, plot)
    if os.path.exists(plot_path):
        print(f"\n## {plot}")
        display(Image(plot_path))
    else:
        print(f"‚ö†Ô∏è Plot not found: {plot_path}")



In [None]:
# Copy analysis results to Google Drive
import shutil
from google.colab import drive

drive.mount('/content/drive')

analysis_dir = "data/processed/analysis"
drive_dir = "/content/drive/MyDrive/TinyML_models/analysis"

# Create directory
os.makedirs(drive_dir, exist_ok=True)

# Copy all analysis files
found_any = False
if os.path.exists(analysis_dir):
    for file in os.listdir(analysis_dir):
        src = os.path.join(analysis_dir, file)
        dst = os.path.join(drive_dir, file)
        if os.path.isfile(src):
            shutil.copy(src, dst)
            print(f"‚úÖ Copied: {file}")
            found_any = True

if found_any:
    print(f"\n‚úÖ All analysis results saved to: {drive_dir}")
else:
    print(f"‚ö†Ô∏è No analysis files found in {analysis_dir}")



In [None]:
import os
import shutil
from google.colab import drive
from pathlib import Path

PROJECT_DIR = "/content/TinyML"
OUTPUT_FILES = [
    "src/models/global_model.h5",  # Latest model (always copied)
    "src/models/global_model.tflite",
    "models/global_model.h5",
    "models/global_model.tflite",
    "tiny_model.tflite",
    "model.tflite",
    "model.h5",
    "saved_model.pb"
]

# Also find all timestamped models to preserve history
models_dir = Path(PROJECT_DIR) / "src" / "models"
if models_dir.exists():
    timestamped_models = list(models_dir.glob("global_model_*.h5"))
    if timestamped_models:
        OUTPUT_FILES.extend([f"src/models/{f.name}" for f in timestamped_models])
        print(f"üì¶ Found {len(timestamped_models)} timestamped model(s) to preserve")

# Mount Drive to store the trained models
drive.mount('/content/drive')

dest_dir = "/content/drive/MyDrive/TinyML_models"
os.makedirs(dest_dir, exist_ok=True)

# Create src/models subdirectory in Drive to preserve structure
drive_src_dir = os.path.join(dest_dir, "src", "models")
os.makedirs(drive_src_dir, exist_ok=True)

found_any = False
for fname in OUTPUT_FILES:
    src_path = os.path.join(PROJECT_DIR, fname)
    if os.path.exists(src_path):
        # Keep directory structure in Drive
        dst_path = os.path.join(dest_dir, fname)
        os.makedirs(os.path.dirname(dst_path), exist_ok=True)
        shutil.copy(src_path, dst_path)
        print(f"‚úÖ Copied {fname}")
        found_any = True

if not found_any:
    print("‚ö†Ô∏è No known model files found. Make sure your training script saves a model and update OUTPUT_FILES if needed.")
else:
    print(f"\n‚úÖ All models saved to: {dest_dir}/src/models/")
    print("   üìå Timestamped models are preserved (no overwriting)")
    print("   üìå Latest model: global_model.h5")
