In [4]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
from sklearn.preprocessing import StandardScaler
import os

class UCIHARBenchmark:
    def __init__(self, data_path):
       
        self.data_path = data_path
        self.X_train_raw = None 
        self.X_test_raw = None   
        self.X_train_flat = None 
        self.X_test_flat = None  
        self.y_train = None
        self.y_test = None
        self.n_classes = 6
        self.n_timesteps = 128    
        self.n_sensors = 9        
        self.activity_labels = {
            0: 'WALKING',
            1: 'WALKING_UPSTAIRS', 
            2: 'WALKING_DOWNSTAIRS',
            3: 'SITTING',
            4: 'STANDING',
            5: 'LAYING'
        }
        self.results = {}
        self.scaler = StandardScaler()
    
    def load_raw_signals(self):
  
        print("Loading UCI HAR Dataset - RAW INERTIAL SIGNALS...")
        
        try:
         
            signal_files = [
                'body_acc_x_train.txt', 'body_acc_y_train.txt', 'body_acc_z_train.txt',
                'body_gyro_x_train.txt', 'body_gyro_y_train.txt', 'body_gyro_z_train.txt',
                'total_acc_x_train.txt', 'total_acc_y_train.txt', 'total_acc_z_train.txt'
            ]
            
            test_signal_files = [
                'body_acc_x_test.txt', 'body_acc_y_test.txt', 'body_acc_z_test.txt',
                'body_gyro_x_test.txt', 'body_gyro_y_test.txt', 'body_gyro_z_test.txt',
                'total_acc_x_test.txt', 'total_acc_y_test.txt', 'total_acc_z_test.txt'
            ]
            

            print("Loading training raw signals...")
            train_signals = []
            for signal_file in signal_files:
                signal_path = os.path.join(self.data_path, 'train', 'Inertial Signals', signal_file)
                signal_data = np.loadtxt(signal_path)
                train_signals.append(signal_data)
            

            self.X_train_raw = np.stack(train_signals, axis=2)
            

            print("Loading test raw signals...")
            test_signals = []
            for signal_file in test_signal_files:
                signal_path = os.path.join(self.data_path, 'test', 'Inertial Signals', signal_file)
                signal_data = np.loadtxt(signal_path)
                test_signals.append(signal_data)
            

            self.X_test_raw = np.stack(test_signals, axis=2)
            

            y_train_path = os.path.join(self.data_path, 'train', 'y_train.txt')
            y_test_path = os.path.join(self.data_path, 'test', 'y_test.txt')
            
            self.y_train = np.loadtxt(y_train_path).astype(int) - 1  
            self.y_test = np.loadtxt(y_test_path).astype(int) - 1   
            

            self.X_train_flat = self.X_train_raw.reshape(self.X_train_raw.shape[0], -1)
            self.X_test_flat = self.X_test_raw.reshape(self.X_test_raw.shape[0], -1)
            

            self.X_train_flat = self.scaler.fit_transform(self.X_train_flat)
            self.X_test_flat = self.scaler.transform(self.X_test_flat)
            
            print(f" Raw signals loaded successfully:")
            print(f"   Training raw: {self.X_train_raw.shape} (samples, timesteps, sensors)")
            print(f"   Test raw: {self.X_test_raw.shape}")
            print(f"   Training flat: {self.X_train_flat.shape} (for MLP)")
            print(f"   Test flat: {self.X_test_flat.shape} (for MLP)")
            print(f"   Labels: {self.y_train.shape} train, {self.y_test.shape} test")
            
            return True
            
        except Exception as e:
            print(f"Error loading raw signals: {str(e)}")
            return False
    
    def build_mlp(self):
      
        print("Building MLP model...")
        print("Architecture: Input → Dense(512) → Dense(256) → Dense(128) → Output")
        
        input_dim = self.X_train_flat.shape[1]  
        
        model = keras.Sequential([
            layers.Input(shape=(input_dim,)),
            layers.Dense(512, activation='relu'),
            layers.Dropout(0.5),
            layers.Dense(256, activation='relu'),
            layers.Dropout(0.5),
            layers.Dense(128, activation='relu'),
            layers.Dropout(0.3),
            layers.Dense(self.n_classes, activation='softmax')
        ])
        
        model.compile(
            optimizer='adam',
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )
        
        print(f"MLP model built with {model.count_params():,} parameters")
        return model
    
    def build_cnn(self):
 
        print("Building CNN model...")
        print("Architecture: Input → Conv1D → Conv1D → GlobalMaxPool → Dense → Output")
        
        model = keras.Sequential([
            layers.Input(shape=(self.n_timesteps, self.n_sensors)),
            layers.Conv1D(64, 3, activation='relu', padding='same'),
            layers.Conv1D(64, 3, activation='relu', padding='same'),
            layers.Dropout(0.3),
            layers.Conv1D(128, 3, activation='relu', padding='same'),
            layers.Conv1D(128, 3, activation='relu', padding='same'),
            layers.Dropout(0.3),
            layers.GlobalMaxPooling1D(),
            layers.Dense(128, activation='relu'),
            layers.Dropout(0.5),
            layers.Dense(self.n_classes, activation='softmax')
        ])
        
        model.compile(
            optimizer='adam',
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )
        
        print(f"CNN model built with {model.count_params():,} parameters")
        return model
    
    def build_lstm(self):
       
        print("Building LSTM model...")
        print("Architecture: Input → LSTM(100) → LSTM(100) → Dense(50) → Output")
        
        model = keras.Sequential([
            layers.Input(shape=(self.n_timesteps, self.n_sensors)),
            layers.LSTM(100, return_sequences=True, dropout=0.3, recurrent_dropout=0.3),
            layers.LSTM(100, dropout=0.3, recurrent_dropout=0.3),
            layers.Dense(50, activation='relu'),
            layers.Dropout(0.5),
            layers.Dense(self.n_classes, activation='softmax')
        ])
        
        model.compile(
            optimizer='adam',
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )
        
        print(f"LSTM model built with {model.count_params():,} parameters")
        return model
    
    def build_cnn_lstm(self):
    
        print("Building CNN-LSTM Hybrid model...")
        print("Architecture: Input → Conv1D → Conv1D → MaxPool → LSTM → LSTM → Dense → Output")
        
        model = keras.Sequential([
            layers.Input(shape=(self.n_timesteps, self.n_sensors)),

            layers.Conv1D(64, 3, activation='relu', padding='same'),
            layers.Conv1D(64, 3, activation='relu', padding='same'),
            layers.Dropout(0.3),
            layers.MaxPooling1D(pool_size=2),

            layers.LSTM(50, return_sequences=True, dropout=0.3),
            layers.LSTM(50, dropout=0.3),

            layers.Dense(50, activation='relu'),
            layers.Dropout(0.5),
            layers.Dense(self.n_classes, activation='softmax')
        ])
        
        model.compile(
            optimizer='adam',
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )
        
        print(f"CNN-LSTM model built with {model.count_params():,} parameters")
        return model
    
    def train_and_evaluate_model(self, model, model_name, use_raw_signals=True, epochs=30, batch_size=32):
  
        print(f"\n{'='*60}")
        print(f"Training {model_name}")
        print(f"{'='*60}")
        

        if use_raw_signals:
            X_train, X_test = self.X_train_raw, self.X_test_raw
            print(f"Using raw signals shape: {X_train.shape}")
        else:
            X_train, X_test = self.X_train_flat, self.X_test_flat
            print(f"Using flattened signals shape: {X_train.shape}")
        

        history = model.fit(
            X_train, self.y_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=0.2,
            verbose=1
        )
        

        y_pred = model.predict(X_test, verbose=0)
        y_pred_classes = np.argmax(y_pred, axis=1)
        

        accuracy = accuracy_score(self.y_test, y_pred_classes)
        precision, recall, f1, _ = precision_recall_fscore_support(
            self.y_test, y_pred_classes, average='macro', zero_division=0
        )
        

        self.results[model_name] = {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1_score': f1
        }
        
        print(f"\n{model_name} Results:")
        print(f"Accuracy:  {accuracy:.4f}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall:    {recall:.4f}")
        print(f"F1-Score:  {f1:.4f}")
        

        print(f"\nDetailed Classification Report for {model_name}:")
        target_names = [self.activity_labels[i] for i in range(self.n_classes)]
        print(classification_report(self.y_test, y_pred_classes, target_names=target_names))
        
        return model
    
    def create_results_table(self):

        if not self.results:
            print("No results available. Run models first.")
            return
        
        print("\n" + "="*80)
        print("COMPREHENSIVE MODEL COMPARISON TABLE")
        print("UCI HAR Dataset - Raw Inertial Signals")
        print("="*80)
        

        results_data = []
        for model_name, metrics in self.results.items():
            results_data.append({
                'Model': model_name,
                'Accuracy': f"{metrics['accuracy']:.4f}",
                'Precision': f"{metrics['precision']:.4f}",
                'Recall': f"{metrics['recall']:.4f}",
                'F1-Score': f"{metrics['f1_score']:.4f}"
            })
        
        results_df = pd.DataFrame(results_data)
        print(results_df.to_string(index=False))
        
        return results_df
    
    def analyze_results(self):

        if not self.results:
            print("No results available. Run models first.")
            return
        
        print("\n" + "="*80)
        print("COMPREHENSIVE MODEL ANALYSIS")
        print("="*80)
        

        best_model = max(self.results.items(), key=lambda x: x[1]['f1_score'])
        worst_model = min(self.results.items(), key=lambda x: x[1]['f1_score'])
        
        print(f" Best Performance: {best_model[0]}")
        print(f"   F1-Score: {best_model[1]['f1_score']:.4f}")
        print(f"   Accuracy: {best_model[1]['accuracy']:.4f}")
        
        print(f"\n Performance Ranking (by F1-Score):")
        sorted_results = sorted(self.results.items(), key=lambda x: x[1]['f1_score'], reverse=True)
        for i, (model, metrics) in enumerate(sorted_results, 1):
            print(f"   {i}. {model}: {metrics['f1_score']:.4f}")
        
        print(f"\n Model-Specific Insights:")
        
        if 'MLP' in self.results:
            mlp_f1 = self.results['MLP']['f1_score']
            print(f"   • MLP (F1: {mlp_f1:.4f})")
            print(f"     - Uses flattened raw signals (1152 features)")
            print(f"     - Good baseline, loses temporal structure")
        
        if 'CNN' in self.results:
            cnn_f1 = self.results['CNN']['f1_score']
            print(f"   • CNN (F1: {cnn_f1:.4f})")
            print(f"     - Excels at local pattern detection")
            print(f"     - Good for spatial/local temporal features")
        
        if 'LSTM' in self.results:
            lstm_f1 = self.results['LSTM']['f1_score']
            print(f"   • LSTM (F1: {lstm_f1:.4f})")
            print(f"     - Specializes in long-term temporal dependencies")
            print(f"     - Good for sequence modeling")
        
        if 'CNN-LSTM' in self.results:
            cnn_lstm_f1 = self.results['CNN-LSTM']['f1_score']
            print(f"   • CNN-LSTM (F1: {cnn_lstm_f1:.4f})")
            print(f"     - Combines local patterns + sequence modeling")
            print(f"     - Most sophisticated approach")
        
        print(f"\n Key Findings:")
        print(f"   • All models use RAW inertial signals (as required)")
        print(f"   • Data shape: (samples, 128 timesteps, 9 sensors)")
        print(f"   • 9 sensors: 3×acc + 3×gyro + 3×total_acc")
        print(f"   • Performance difference: {best_model[1]['f1_score'] - worst_model[1]['f1_score']:.4f}")


if __name__ == "__main__":

    data_path = r"/Users/advait/Desktop/Jupyter/ML-1/human/UCI_HAR_Dataset"
    

    benchmark = UCIHARBenchmark(data_path)
    

    if benchmark.load_raw_signals():
        print("\n Starting comprehensive benchmarking with RAW signals...")
        

        models_to_train = [
            ('MLP', benchmark.build_mlp, False),       
            ('CNN', benchmark.build_cnn, True),        
            ('LSTM', benchmark.build_lstm, True),      
            ('CNN-LSTM', benchmark.build_cnn_lstm, True)
        ]
        
        for model_name, build_func, use_raw in models_to_train:
            print(f"\n Training {model_name}...")
            model = build_func()
            benchmark.train_and_evaluate_model(
                model, model_name, 
                use_raw_signals=use_raw, 
                epochs=20, batch_size=32
            )
            

            del model
            tf.keras.backend.clear_session()
        

        benchmark.create_results_table()
        benchmark.analyze_results()
        
       
        
    else:
        print("Not ")


Loading UCI HAR Dataset - RAW INERTIAL SIGNALS...
Loading training raw signals...
Loading test raw signals...
 Raw signals loaded successfully:
   Training raw: (7352, 128, 9) (samples, timesteps, sensors)
   Test raw: (2947, 128, 9)
   Training flat: (7352, 1152) (for MLP)
   Test flat: (2947, 1152) (for MLP)
   Labels: (7352,) train, (2947,) test

 Starting comprehensive benchmarking with RAW signals...

 Training MLP...
Building MLP model...
Architecture: Input → Dense(512) → Dense(256) → Dense(128) → Output
MLP model built with 755,334 parameters

Training MLP
Using flattened signals shape: (7352, 1152)
Epoch 1/20
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6533 - loss: 0.9595 - val_accuracy: 0.8926 - val_loss: 0.4118
Epoch 2/20
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7995 - loss: 0.5118 - val_accuracy: 0.9157 - val_loss: 0.3257
Epoch 3/20
[1m184/184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m