In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import glob
import os
import time
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Start timing
start_time = time.time()

# Step 1: Load all TXT files
directory_path = "/content/drive/My Drive/gait-in-parkinsons-disease"  # Update path
all_files = glob.glob(os.path.join(directory_path, "*.txt"))

df_list = []
for file in all_files:
    try:
        temp_df = pd.read_csv(file, sep="\t", header=None, engine="python", on_bad_lines="skip")
        df_list.append(temp_df)
    except Exception as e:
        print(f"Error reading {file}: {e}")

# Combine all files
df = pd.concat(df_list, ignore_index=True)

# Step 2: Preprocess Data
df = df.dropna()  # Remove missing values

# Assign column names if needed
num_columns = df.shape[1]  # Get the number of columns
column_names = [f"Feature_{i}" for i in range(num_columns - 1)] + ["Label"]
df.columns = column_names

# Convert all values to float
df = df.astype(float)

# If dataset is huge, sample only 50,000 rows for faster training
if len(df) > 50000:
    df = df.sample(n=50000, random_state=42)

# Check label values
print("Unique Label Values:", df["Label"].unique())

# Convert labels to binary classification
df["Label"] = df["Label"].apply(lambda x: 1 if x > df["Label"].median() else 0)

# Split data into features and labels
X = df.drop(columns=["Label"])  # Features
y = df["Label"].astype(int)  # Convert to integer labels

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 3: Train Model
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Use fewer trees for faster training
model = RandomForestClassifier(n_estimators=50, random_state=42, n_jobs=-1)
model.fit(X_train, y_train)

# Step 4: Evaluate Model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

end_time = time.time()
execution_time = end_time - start_time

print(f"✅ Model Accuracy: {accuracy * 100:.2f}%")
print(f"⚡ Execution Time: {execution_time:.2f} seconds")


Unique Label Values: [   0.    614.13  909.37 ...  199.98  122.76 1156.1 ]
✅ Model Accuracy: 98.79%
⚡ Execution Time: 121.48 seconds
