In [1]:
import pandas as pd # converts the JSON into a DataFrame.
import numpy as np # used internally by pandas & sklearn.
import glob
import json

# CONFIGURATION
WINDOW_SIZE = 50   # 50 readings = approx 1 second (at 20ms rate)
STEP_SIZE = 10     # Overlap windows to get more data
INPUT_FILES = "*.csv" # Finds all your CSVs

def calculate_magnitude(row): #function that takes one sensor data row (main preprocessing step)

    return np.sqrt(row['acc_x']**2 + row['acc_y']**2 + row['acc_z']**2) #Turns 3-axis accelerometer readings into a single number called magnitude/total force.

def process_files(): # the begining of the windowing + feature extraction logic 
    all_files = glob.glob(INPUT_FILES) # Finds CSV files (returns all filenames matching "*.csv")
    print(f"Found {len(all_files)} files: {all_files}")

    processed_samples = [] # Each training sample (one window of data) will be stored as a dictionary inside this list.
    
    for filename in all_files: #runs for no of files
        print(f"Processing {filename}...") # Good for progress tracking.
        try: 
            df = pd.read_csv(filename) # Reads the CSV file into a pandas DataFrame.
            
            # 1. Feature Engineering section- Now the core preprocessing starts.
            # Calculate Total Force (Magnitude) and For every row, apply the previously defined calculate_magnitude() function
            df['mag'] = df.apply(calculate_magnitude, axis=1) # axis=1 means "apply function on each row"; Converts 3 messy values into 1 clean value
            
            # Calculate Jerk (Change in Force)--> calculates difference between each two consecutive magnitude values.
            df['jerk'] = df['mag'].diff().fillna(0) # fillna(0) replaces the first NaN with 0.
            
            # Fill missing GPS speeds with 0 to avoid errors during mean() calculations
            df['gps_speed'] = df['gps_speed'].fillna(0) #Summarizes speed behavior over time

            # 2. Sliding Window Logic
            
            for i in range(0, len(df) - WINDOW_SIZE, STEP_SIZE): # creates overlapping windows of sensor data. WINDOW_SIZE= training sample, STEP_SIZE= no of forward rows
                window = df.iloc[i : i + WINDOW_SIZE] # Extract rows ex:  from index i to i+50
                
                # The label for this window is the most common label in these 50 rows
                label = window['label'].mode()[0] # Mode gives the dominant driving state; cleaning; removes noise
                
                # Feature Extraction Block- Extract Features from this 1-second chunk
                features = {
                    "label": label,
                    "avg_speed": float(window['gps_speed'].mean()), # average of the speed values in the window (typical speed)
                    "max_force": float(window['mag'].max()), # Highest acceleration magnitude in the window to capture strong events (braking, sudden turns)
                    "std_force": float(window['mag'].std()), # How much it vibrated--> Measures how much shaking or variation is happening.
                    "min_force": float(window['mag'].min()), # Lowest magnitude to detect moments when car is steady or stopped.
                    "avg_jerk": float(window['jerk'].abs().mean()) # Represents smooth vs sudden movement transitions.
                }
                
                processed_samples.append(features) # Adds the computed feature dictionary to the list. ex: {"label":"Cruising", "avg_speed":3.2, ...}
                
        except Exception as e:
            print(f"Skipping {filename} due to error: {e}")

    # 3. Save to JSON--> the input dataset used for model training.
    print(f"Generated {len(processed_samples)} training samples.")
    with open('training_data.json', 'w') as f: # Opens a file named training_data.json in write mode.
        json.dump(processed_samples, f) # Writes the list as formatted JSON; No NaN allowed-cleaning
    print("Saved to 'training_data.json'. READY FOR AI!")

if __name__ == "__main__": # fill here
    process_files()

Found 6 files: ['10min- 1.csv', '10min- 2.csv', '10min- 3.csv', '13min.csv', '18min- 1.csv', '18min- 2.csv']
Processing 10min- 1.csv...
Processing 10min- 2.csv...
Processing 10min- 3.csv...
Processing 13min.csv...
Processing 18min- 1.csv...
Processing 18min- 2.csv...
Generated 6623 training samples.
Saved to 'training_data.json'. READY FOR AI!


In [2]:
import pandas as pd # loads JSON, CSV, and manage data tables as well.
import json
import seaborn as sns # Makes statistical plots
import matplotlib.pyplot as plt # creates graphs/plots
from sklearn.model_selection import train_test_split # Splits dataset into training (80%) and testing (20%). (sklearn.model_selection- ML library used for training RandomForest)
from sklearn.ensemble import RandomForestClassifier # Loads the RandomForest algorithm.
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score # Provides evaluation metrics.
import joblib # save and load trained ML (.pkl file) models to disk
# ML Model Training Section
def train_ambulance_ai(): # performs the entire training procedure.
    # 1. Load the processed data
    print("Loading training_data.json...")
    try:
        with open('training_data.json', 'r') as f: # Opens the JSON file created earlier.
            data = json.load(f) # Loads all training samples into the variable (data).
    except FileNotFoundError:
        print("Error: 'training_data.json' not found. Run process_data.py first!")
        return

    df = pd.DataFrame(data) # converts my JSON training samples into a structured Excel-like table (ML friendly)

    # 2. Sanity Check
    print(f"\nTotal Samples: {len(df)}") # Checks dataset size.
    print("Class Distribution (Before Balancing):")
    print(df['label'].value_counts()) # Shows how many samples each class (label) has.

    # 3. Prepare Inputs (X) and Outputs (y)
    X = df.drop(columns=['label']) # numeric feature columns (avg_speed, max_force..) only.
    y = df['label'] # correct class for each window (row).

    # 4. Split into Training (80%) and Testing (20%)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # ensures reproducible results by fixing the random seed. Start your random number generator from this exact position (42).

    # 5. Train the Random Forest
    # 'class_weight="balanced"' is CRITICAL here. 
    # It tells the AI: "Pay 10x more attention to 'Braking' than 'Cruising' because it's rare."
    print("\nTraining Random Forest Model...")
    model = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42) # Use 100 decision trees inside the forest, helps the model correctly identify and doesn't ignore rare classes behaviors like braking, turning, or pullover.
    model.fit(X_train, y_train) # Train the model; The model learns patterns in the training data.

    # 6. Evaluate
    print("\n--- MODEL REPORT CARD ---")
    y_pred = model.predict(X_test) # Predict test labels; Run the trained model on the test dataset & Compare predicted labels with true labels.
    
    accuracy = accuracy_score(y_test, y_pred) # Evaluate Model Performance
    print(f"Overall Accuracy: {accuracy:.2%}")
    
    print("\nDetailed Report:")
    print(classification_report(y_test, y_pred))

    # 7. Confusion Matrix (Where did it get confused?)
    print("\nConfusion Matrix (Rows=Actual, Cols=Predicted):")
    cm = confusion_matrix(y_test, y_pred)
    print(cm)

    # 8. Feature Importance (What sensors mattered?)
    print("\n--- SENSOR IMPORTANCE ---")
    importances = model.feature_importances_ #Shows how useful that feature was in making correct predictions
    features = X.columns #matches each importance value to its correct name.
    # Sort them
    indices = importances.argsort()[::-1] #sorts the features by importance — from most important → least important. .argsort() sorts from smallest to largest.
    for i in range(len(features)):
        print(f"{i+1}. {features[indices[i]]}: {importances[indices[i]]:.4f}")

    # 9. Save the Model
    joblib.dump(model, 'ambulance_model.pkl')
    print("\n[SUCCESS] Model saved as 'ambulance_model.pkl'")

if __name__ == "__main__":
    train_ambulance_ai()

Loading training_data.json...

Total Samples: 6623
Class Distribution (Before Balancing):
label
Cruising      4772
Braking       1152
Lane Left      419
Lane Right     251
Pullover        29
Name: count, dtype: int64

Training Random Forest Model...

--- MODEL REPORT CARD ---
Overall Accuracy: 87.92%

Detailed Report:
              precision    recall  f1-score   support

     Braking       0.85      0.78      0.81       219
    Cruising       0.89      0.98      0.93       952
   Lane Left       0.86      0.56      0.68        91
  Lane Right       0.85      0.19      0.31        59
    Pullover       1.00      0.25      0.40         4

    accuracy                           0.88      1325
   macro avg       0.89      0.55      0.63      1325
weighted avg       0.88      0.88      0.86      1325


Confusion Matrix (Rows=Actual, Cols=Predicted):
[[171  43   4   1   0]
 [ 17 931   3   1   0]
 [ 12  28  51   0   0]
 [  1  47   0  11   0]
 [  0   2   1   0   1]]

--- SENSOR IMPORTANCE ---

In [3]:
import joblib #load that saved model (.pkl) back into memory
import m2cgen as m2c #Convert a trained machine learning model into js
import json
import os

# 1. Load trained model
print("Loading ambulance_model.pkl...")
try:
    model = joblib.load('ambulance_model.pkl')
except FileNotFoundError:
    print("Error: 'ambulance_model.pkl' not found. Make sure you ran train_model.py!")
    exit()

# 2. Convert it to JavaScript code
print("Converting to JavaScript...")
# This turns the random forest trees into a huge "if/else" function
js_code = m2c.export_to_javascript(model)

# 3. Add the "export" keyword so React Native can use it
final_js = "export default " + js_code

# 4. Save it to the CURRENT directory first (safest for Jupyter)
output_filename = 'ambulance_model.js' 

with open(output_filename, 'w') as f:
    f.write(final_js)

print(f"Success! Model saved as {output_filename}")
print(f"Action Required: Move '{output_filename}' into your 'app' folder manually.")

Loading ambulance_model.pkl...
Converting to JavaScript...
Success! Model saved as ambulance_model.js
Action Required: Move 'ambulance_model.js' into your 'app' folder manually.
