In [None]:
import numpy as np
import os
import joblib # For saving the model
from sklearn.ensemble import RandomForestClassifier 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

print("Libraries imported.")

Libraries imported.


In [None]:

MODEL_CHOICE = RandomForestClassifier # Assign the class itself
MODEL_PARAMS = {'n_estimators': 100, 'random_state': 42} # parameters

TEST_SPLIT_SIZE = 0.2
RANDOM_STATE = 42 

NUM_FEATURES = 63 
NUM_CLASSES = 29  # A-Z and space and delete, might remove space and delete

MODEL_SAVE_PATH = '../models/asl_classifier.pkl' 
os.makedirs(os.path.dirname(MODEL_SAVE_PATH), exist_ok=True) 

print("Parameters defined.")

Parameters defined.


In [None]:
def generate_dummy_data(num_samples, num_features, num_classes):
    # dummy data for now
    print(f"Generating {num_samples} dummy samples...")
    X = np.random.rand(num_samples, num_features) 
    y = np.random.randint(0, num_classes, size=num_samples)
    print("Dummy data generated.")
    return X, y

# Generate the dummy data
X_dummy, y_dummy = generate_dummy_data(1000, NUM_FEATURES, NUM_CLASSES)

print(f"Dummy X shape: {X_dummy.shape}")
print(f"Dummy y shape: {y_dummy.shape}") 

Generating 1000 dummy samples...
Dummy data generated.
Dummy X shape: (1000, 63)
Dummy y shape: (1000,)


In [None]:
print("Splitting data into train and validation sets...")
X_train, X_val, y_train, y_val = train_test_split(
    X_dummy, y_dummy, 
    test_size=TEST_SPLIT_SIZE, 
    random_state=RANDOM_STATE,
    stratify=y_dummy 
)

print(f"Train set size: {X_train.shape[0]} samples")
print(f"Validation set size: {X_val.shape[0]} samples")

Splitting data into train and validation sets...
Train set size: 800 samples
Validation set size: 200 samples


In [None]:
# Initializing the model
model = MODEL_CHOICE(**MODEL_PARAMS) 
print(f"Initialized model: {model}")

# Training the model
print("Training model on dummy data...")
model.fit(X_train, y_train) 
print("Model training complete.")

Initialized model: RandomForestClassifier(random_state=42)
Training model on dummy data...
Model training complete.


In [None]:
# Evaluate the model
print("Evaluating model on dummy validation data...")
y_pred = model.predict(X_val)

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred)
print(f"Validation Accuracy: {accuracy:.4f}")

try:
    report = classification_report(y_val, y_pred, zero_division=0) 
    print("Classification Report:\n", report)
except Exception as e:
    print(f"Could not generate full classification report: {e}")

Evaluating model on dummy validation data...
Validation Accuracy: 0.0150
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         8
           1       0.00      0.00      0.00         6
           2       0.09      0.12      0.11         8
           3       0.00      0.00      0.00         8
           4       0.00      0.00      0.00         6
           5       0.00      0.00      0.00         6
           6       0.00      0.00      0.00         6
           7       0.08      0.12      0.10         8
           8       0.04      0.10      0.06        10
           9       0.00      0.00      0.00         7
          10       0.00      0.00      0.00         7
          11       0.00      0.00      0.00         6
          12       0.00      0.00      0.00         6
          13       0.00      0.00      0.00         7
          14       0.00      0.00      0.00         8
          15       0.00      0.00     

In [9]:
# Save the trained model
print(f"Saving model to: {MODEL_SAVE_PATH}")
try:
    joblib.dump(model, MODEL_SAVE_PATH)
    print("Model saved successfully.")
except Exception as e:
    print(f"Error saving model: {e}")

Saving model to: ../models/asl_classifier.pkl
Model saved successfully.
