# Colab Setup for Korean Character Recognition

Run this notebook ONCE before running main.ipynb

In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from typing import List, Tuple, Optional

def get_class_names(feature_dir: str) -> List[str]:
    return sorted([f[:-4] for f in os.listdir(feature_dir) if f.endswith(".npy")])

def load_hog_features(feature_dir: str, selected_classes: Optional[List[str]] = None, max_samples_per_class: Optional[int] = None, shuffle: bool = True, random_state: int = 42) -> Tuple[np.ndarray, np.ndarray, List[str]]:
    class_names = get_class_names(feature_dir)
    if selected_classes is not None:
        class_names = [c for c in class_names if c in selected_classes]
    X_list, y_list = [], []
    for label_idx, cls in enumerate(class_names):
        path = os.path.join(feature_dir, f"{cls}.npy")
        if not os.path.exists(path): continue
        feats = np.load(path)
        if max_samples_per_class and len(feats) > max_samples_per_class:
            feats = feats[np.random.default_rng(random_state).choice(len(feats), max_samples_per_class, replace=False)]
        X_list.append(feats)
        y_list.append(np.full(len(feats), label_idx, dtype=np.int64))
    X, y = np.vstack(X_list), np.concatenate(y_list)
    if shuffle:
        indices = np.random.default_rng(random_state).permutation(len(X))
        X, y = X[indices], y[indices]
    return X, y, class_names

def _train_test_split(X, y, train_ratio=0.8, test_ratio=0.2, random_state=42, stratify=True):
    strat = y if stratify else None
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=(1.0 - train_ratio), random_state=random_state, stratify=strat)
    return X_train, y_train, X_test, y_test

print("[OK] Functions loaded")

In [None]:
from google.colab import files
import zipfile

print("Upload features.zip (create with: zip -r features.zip features/hog-extended/)")
uploaded = files.upload()

if 'features.zip' in uploaded:
    with zipfile.ZipFile('features.zip', 'r') as zip_ref:
        zip_ref.extractall('.')
    if os.path.exists('features/hog-extended'):
        npy_count = len([f for f in os.listdir('features/hog-extended') if f.endswith('.npy')])
        print(f"[OK] Extracted {npy_count} files")
    else:
        print("[ERROR] Extraction failed")
else:
    print("[ERROR] No file uploaded")

In [None]:
os.makedirs('models', exist_ok=True)
os.makedirs('results', exist_ok=True)
print("[OK] Directories created")

In [None]:
X, y, classes = load_hog_features('features/hog-extended', max_samples_per_class=10)
print(f"[OK] Test: {len(classes)} classes, {len(X)} samples, {X.shape[1]} dims")
print("Setup complete! Run main.ipynb now")