# Sign Language Model (Sign MNIST)

Stages:
1. Environment + Imports
2. Load CSV Data (Sign MNIST)
3. Train XGBoost Model
4. Evaluate
5. Save Model
6. Export to ONNX

**Note**: We use `onnxmltools` to convert XGBoost models to ONNX.

In [3]:
# 1. Environment + Imports
%pip install pandas numpy scikit-learn xgboost skl2onnx onnx onnxmltools

import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib
import onnxmltools
from onnxmltools.convert import convert_xgboost
from onnxmltools.convert.common.data_types import FloatTensorType
import onnx

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
# 2. Load CSV Data
train_path = '../data/asl_alphabet/sign_mnist_train.csv'
test_path = '../data/asl_alphabet/sign_mnist_test.csv'

if os.path.exists(train_path):
    print(f"Loading train data from {train_path}...")
    train_df = pd.read_csv(train_path)
    print(f"Train shape: {train_df.shape}")
    
    # Sign MNIST format: 'label', pixel1, pixel2, ...
    X_train = train_df.drop('label', axis=1)
    y_train = train_df['label']
    
    # Normalize pixels 0-1
    X_train = X_train / 255.0
    
else:
    print("WARNING: Train CSV not found!")
    
if os.path.exists(test_path):
    print(f"Loading test data from {test_path}...")
    test_df = pd.read_csv(test_path)
    
    X_test = test_df.drop('label', axis=1)
    y_test = test_df['label']
    X_test = X_test / 255.0
else:
    print("WARNING: Test CSV not found!")

Loading train data from ../data/asl_alphabet/sign_mnist_train.csv...
Train shape: (27455, 785)
Loading test data from ../data/asl_alphabet/sign_mnist_test.csv...


In [5]:
# 3. Train XGBoost Model

if 'X_train' in locals():
    # Encode labels
    le = LabelEncoder()
    y_train_encoded = le.fit_transform(y_train)
    if 'y_test' in locals():
        y_test_encoded = le.transform(y_test)

    print("Training XGBoost... this might take a minute.")
    # Using tree_method='hist' for speed
    model = XGBClassifier(eval_metric='mlogloss', n_estimators=100, tree_method='hist')
    model.fit(X_train, y_train_encoded)
    print("Training done.")

Training XGBoost... this might take a minute.
Training done.


In [6]:
# 4. Evaluate

if 'model' in locals() and 'X_test' in locals():
    y_pred = model.predict(X_test)
    print(f"Accuracy: {accuracy_score(y_test_encoded, y_pred)}")
    print(classification_report(y_test_encoded, y_pred))

Accuracy: 0.7663134411600669
              precision    recall  f1-score   support

           0       0.85      0.97      0.91       331
           1       0.96      0.89      0.93       432
           2       0.93      0.91      0.92       310
           3       0.86      0.98      0.92       245
           4       0.82      0.89      0.85       498
           5       0.79      0.92      0.85       247
           6       0.89      0.81      0.85       348
           7       0.97      0.90      0.94       436
           8       0.80      0.69      0.74       288
           9       0.77      0.64      0.70       331
          10       0.79      0.99      0.88       209
          11       0.77      0.60      0.67       394
          12       0.70      0.40      0.51       291
          13       0.93      0.68      0.79       246
          14       0.93      0.97      0.95       347
          15       0.66      0.95      0.78       164
          16       0.24      0.44      0.31       14

In [7]:
# 5. Save Model

if 'model' in locals():
    src_dir = '../src'
    os.makedirs(src_dir, exist_ok=True)
    joblib.dump(model, os.path.join(src_dir, 'model.joblib'))
    joblib.dump(le, os.path.join(src_dir, 'label_encoder.joblib'))
    print(f"Model saved to {src_dir}")

Model saved to ../src


In [8]:
# 6. Export to ONNX

if 'model' in locals():
    print("Converting to ONNX using onnxmltools...")
    
    # Re-import to ensure it's available
    try:
        from onnxmltools.convert import convert_xgboost
        from onnxmltools.convert.common.data_types import FloatTensorType
    except ImportError:
        print("Creating fallback import... please run cell 1 to install onnxmltools")
        %pip install onnxmltools
        import onnxmltools
        from onnxmltools.convert import convert_xgboost
        from onnxmltools.convert.common.data_types import FloatTensorType

    model.get_booster().feature_names = [f'f{i}' for i in range(X_train.shape[1])]
    n_features = X_train.shape[1]
    initial_type = [('float_input', FloatTensorType([None, n_features]))]
    
    # Use onnxmltools.convert_xgboost for XGBoost models
    onnx_model = convert_xgboost(model, initial_types=initial_type)
    
    public_dir = os.path.join('..', '..', 'web-app', 'public','models')
    os.makedirs(public_dir, exist_ok=True)
    
    onnx_path = os.path.join(public_dir, 'model.onnx')
    with open(onnx_path, "wb") as f:
        f.write(onnx_model.SerializeToString())
    print(f"ONNX model saved to {onnx_path}")

Converting to ONNX using onnxmltools...
ONNX model saved to ..\..\web-app\public\models\model.onnx
