In [3]:
# Typing Speed Detector - Optimized CPU Version
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import time
import random
import joblib

# Generate synthetic typing data more efficiently
def generate_typing_data(num_samples=1000):
    data = []
    target = []
    sentences = [
        "The quick brown fox jumps over the lazy dog.",
        "Pack my box with five dozen liquor jugs.",
        "How vexingly quick daft zebras jump!",
        "Bright vixens jump; dozy fowl quack.",
        "Sphinx of black quartz, judge my vow."
    ]

    # Pre-calculate sentence lengths
    sentence_lengths = [len(s) for s in sentences]

    for _ in range(num_samples):
        # Select a random sentence
        idx = random.randint(0, len(sentences)-1)
        sentence = sentences[idx]
        chars = len(sentence)

        # Simulate typing metrics without actual delays
        # Generate random WPM between 20-120
        wpm = random.uniform(20, 120)

        # Calculate derived metrics based on WPM
        total_time = (chars / 5) / (wpm / 60)  # Reverse calculate time from WPM
        avg_pause = total_time / chars
        pause_std = avg_pause * random.uniform(0.8, 1.2)
        error_rate = random.uniform(0, 0.1)

        # Categorize typing speed
        if wpm < 30:
            speed_class = 'slow'
        elif 30 <= wpm < 60:
            speed_class = 'moderate'
        else:
            speed_class = 'fast'

        data.append([wpm, avg_pause, pause_std, error_rate, chars])
        target.append(speed_class)

    return pd.DataFrame(data, columns=['wpm', 'avg_pause', 'pause_std', 'error_rate', 'total_chars']), target

# Generate dataset (500 samples is sufficient for good accuracy)
print("Generating dataset...")
start_time = time.time()
X, y = generate_typing_data(500)
print(f"Dataset generated in {time.time() - start_time:.2f} seconds")

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model with optimized parameters
print("Training model...")
start_time = time.time()
model = RandomForestClassifier(n_estimators=50, max_depth=5, random_state=42)  # Reduced complexity
model.fit(X_train, y_train)
print(f"Model trained in {time.time() - start_time:.2f} seconds")

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# Save model
joblib.dump(model, 'typing_speed_model.joblib')
print("Model saved as 'typing_speed_model.joblib'")

# Feature importance analysis (optional)
importances = model.feature_importances_
features = X.columns
print("\nFeature Importances:")
for feature, importance in zip(features, importances):
    print(f"{feature}: {importance:.3f}")

Generating dataset...
Dataset generated in 0.01 seconds
Training model...
Model trained in 0.15 seconds
Model Accuracy: 1.00
Model saved as 'typing_speed_model.joblib'

Feature Importances:
wpm: 0.343
avg_pause: 0.443
pause_std: 0.205
error_rate: 0.007
total_chars: 0.001
