In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier
import pickle

# Load CSV
df = pd.read_csv("train_landmarks.csv")

# Pisah fitur dan label
X = df.drop("label", axis=1)
y = df["label"]

# -------------------------------
# FIX 1 — HAPUS LABEL YANG CUMA 1 SAMPLE
# -------------------------------
label_counts = y.value_counts()
invalid_labels = label_counts[label_counts <= 1].index

if len(invalid_labels) > 0:
    print("⚠️ Menghapus label dengan sample terlalu sedikit:", invalid_labels.tolist())
    df = df[~df["label"].isin(invalid_labels)]
    X = df.drop("label", axis=1)
    y = df["label"]

# Encode label huruf A–Z
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# -------------------------------
# CREATE SCALER
# -------------------------------
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_encoded, test_size=0.2, shuffle=True, stratify=y_encoded, random_state=42
)

# Train model
model = RandomForestClassifier(
    n_estimators=400,
    max_depth=40,
    random_state=42
)
model.fit(X_train, y_train)

# Evaluasi
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=le.classes_))

# Save model
with open("asl_model.pkl", "wb") as f:
    pickle.dump(model, f)

# Save scaler
with open("asl_scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

# Save label encoder
with open("asl_labels.pkl", "wb") as f:
    pickle.dump(le, f)

print("\nSaved: asl_model.pkl, asl_scaler.pkl, asl_labels.pkl")

⚠️ Menghapus label dengan sample terlalu sedikit: ['nothing']
Accuracy: 0.9845423073826584
              precision    recall  f1-score   support

           A       0.99      0.98      0.98       429
           B       0.99      1.00      1.00       436
           C       0.99      1.00      0.99       381
           D       1.00      0.98      0.99       480
           E       0.98      0.98      0.98       455
           F       0.99      1.00      0.99       567
           G       0.99      1.00      0.99       474
           H       0.99      0.99      0.99       470
           I       0.99      0.99      0.99       466
           J       1.00      0.98      0.99       504
           K       0.99      0.98      0.98       531
           L       1.00      0.99      1.00       498
           M       0.93      0.95      0.94       281
           N       0.94      0.94      0.94       234
           O       0.98      0.99      0.99       445
           P       0.99      0.98      0.98 

In [5]:
import numpy as np
unique, counts = np.unique(y_enc, return_counts=True)
print(dict(zip(unique, counts)))

{0: 2145, 1: 2180, 2: 1907, 3: 2401, 4: 2273, 5: 2837, 6: 2372, 7: 2348, 8: 2332, 9: 2522, 10: 2653, 11: 2491, 12: 1404, 13: 1171, 14: 2226, 15: 1997, 16: 2035, 17: 2501, 18: 2499, 19: 2318, 20: 2491, 21: 2519, 22: 2428, 23: 2124, 24: 2565, 25: 2310, 26: 1600, 27: 1456}
