# SOMNIA PROJECT

Sleep stage classification using machine learning

# Project Setup

This section configures the Colab environment:
- Mounts Google Drive
- Sets up project directories
- Clones the repository
- Installs dependencies
- Configures Kaggle API

In [None]:
# 1. Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# 2. Project Directory Setup
ROOT_PATH = "YOUR_PATH" # Replace me!
GITHUB_REPO = "https://github.com/cem94/somnia.git"
PROJECT_NAME = "somnia"
PROJECT_PATH = {ROOT_PATH}/{PROJECT_NAME}

%cd {PROJECT_PATH}

# 3. Clone GitHub Repository
! git clone {GITHUB_REPO}  # Comment after the first run
%cd {PROJECT_NAME}
! git fetch origin
#! git reset --hard HEAD
! git pull origin main

# 4. Configure Python Path
import sys
import os

sys.path.append(f"{PROJECT_PATH}")
os.chdir(f"{PROJECT_PATH}")

# 5. Install Dependencies
!pip install --upgrade pip
!pip install --no-cache-dir -r requirements.txt

# 6. Kaggle API Setup

# Configuration - will persist in Google Drive
KAGGLE_DIR = f"{PROJECT_PATH}/kaggle_credentials"
if not os.path.exists(KAGGLE_DIR):
    !mkdir -p {KAGGLE_DIR}

if not os.path.exists(f'{KAGGLE_DIR}/kaggle.json'):
    print("\nFIRST TIME SETUP REQUIRED:")
    print("1. Go to https://github.com/Kaggle/kaggle-api#api-credentials")
    print("2. Create and download your kaggle.json")
    print(f"3. Upload it to: {KAGGLE_DIR} using Colab's file explorer")
    print("\nAfter uploading, re-run this cell to continue")
    from google.colab import files
    uploaded = files.upload()
    if uploaded:
        # Move to the directory
        !mv "kaggle.json" "{KAGGLE_DIR}/kaggle.json"
    else:
        print("No file uploaded. Please upload your kaggle.json file.")
        # Raise error for missing credential
        raise FileNotFoundError("kaggle.json not uploaded")

print("Kaggle configuration found, proceeding...")
!mkdir -p ~/.kaggle
!cp "{KAGGLE_DIR}/kaggle.json" ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
print("Kaggle API successfully configured!")

# Model Pipeline Execution

Running `python -m main` executes the complete pipeline:

1. **Data Processing**
   - Fetches raw data from Kaggle
   - Prepares and analyzes datasets

2. **Tokenizer Training**
   - Trains custom tokenizer on processed data

3. **Model Training**
   - Trains the main Somnia model
   - Performs hyperparameter optimization

4. **Model Export**
   - Exports the trained model for deployment
   - Prepares files for Android integration

In [None]:
# 7. Train the Model
!python -m main