In [1]:
import numpy as np
import pandas as pd
from scipy import signal
import json
from datetime import datetime
from collections import deque
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

class MouseDataProcessor:
    def __init__(self, window_size=100, sampling_rate=50):
        self.window_size = window_size
        self.sampling_rate = sampling_rate  # Hz
        self.scaler = StandardScaler()
        
    def load_mouse_data(self, file_path='/mouse_data.json'):
        """Load mouse data from JSON file"""
        data = []
        with open(file_path, 'r') as f:
            for line in f:
                try:
                    entry = json.loads(line.strip())
                    if entry.get('Event') == 'Move':
                        data.append({
                            'x': entry['Position']['X'],
                            'y': entry['Position']['Y'],
                            'timestamp': entry['Time']
                        })
                except:
                    continue
        return pd.DataFrame(data)
    
    def calculate_velocity_acceleration(self, df):
        """Calculate velocity and acceleration features"""
        # Calculate time differences
        df['dt'] = df['timestamp'].diff().fillna(0)
        
        # Calculate distances
        df['dx'] = df['x'].diff().fillna(0)
        df['dy'] = df['y'].diff().fillna(0)
        df['distance'] = np.sqrt(df['dx']**2 + df['dy']**2)
        
        # Calculate velocities (rate of change)
        df['vx'] = df['dx'] / (df['dt'] + 1e-8)
        df['vy'] = df['dy'] / (df['dt'] + 1e-8)
        df['speed'] = np.sqrt(df['vx']**2 + df['vy']**2)
        
        # Calculate accelerations (rate of change of rate of change)
        df['ax'] = df['vx'].diff() / (df['dt'] + 1e-8)
        df['ay'] = df['vy'].diff() / (df['dt'] + 1e-8)
        df['acceleration'] = np.sqrt(df['ax']**2 + df['ay']**2)
        
        # Calculate jerk (rate of change of acceleration)
        df['jerk'] = df['acceleration'].diff() / (df['dt'] + 1e-8)
        
        # Calculate directional changes
        df['direction'] = np.arctan2(df['dy'], df['dx'])
        df['direction_change'] = np.abs(df['direction'].diff()).fillna(0)
        
        # Calculate curvature
        df['curvature'] = df['direction_change'] / (df['distance'] + 1e-8)
        
        return df
    
    def extract_statistical_features(self, df):
        """Extract statistical features for anomaly detection"""
        features = []
        
        # Speed statistics
        features.extend([
            df['speed'].mean(),
            df['speed'].std(),
            df['speed'].max(),
            df['speed'].min(),
            np.percentile(df['speed'], 75) - np.percentile(df['speed'], 25)
        ])
        
        # Acceleration statistics
        features.extend([
            df['acceleration'].mean(),
            df['acceleration'].std(),
            df['acceleration'].max(),
            df['acceleration'].skew() if len(df) > 1 else 0
        ])
        
        # Movement pattern features
        features.extend([
            df['direction_change'].mean(),
            df['curvature'].mean(),
            df['jerk'].mean(),
            df['jerk'].std(),
            len(df[df['speed'] > df['speed'].mean()]) / len(df)  # Percentage of fast movements
        ])
        
        # Temporal features
        features.extend([
            df['dt'].mean(),
            df['dt'].std(),
            1.0 / (df['dt'].mean() + 1e-8)  # Sampling frequency
        ])
        
        return np.array(features)
    
    def create_sequences(self, df, sequence_length=50):
        """Create sequences for LSTM training"""
        processed_df = self.calculate_velocity_acceleration(df)
        
        # Select key features for sequence modeling
        feature_columns = ['speed', 'acceleration', 'direction_change', 
                          'curvature', 'jerk', 'dt']
        
        # Fill NaN values
        for col in feature_columns:
            processed_df[col] = processed_df[col].fillna(0)
        
        # Create sequences
        sequences = []
        for i in range(len(processed_df) - sequence_length + 1):
            seq = processed_df[feature_columns].iloc[i:i+sequence_length].values
            sequences.append(seq)
        
        return np.array(sequences)
    
    def preprocess_for_training(self, sequences):
        """Preprocess sequences for model training"""
        # Reshape for scaling
        original_shape = sequences.shape
        sequences_reshaped = sequences.reshape(-1, sequences.shape[-1])
        
        # Scale features
        sequences_scaled = self.scaler.fit_transform(sequences_reshaped)
        
        # Reshape back
        sequences_scaled = sequences_scaled.reshape(original_shape)
        
        return sequences_scaled


In [2]:
class MouseAnomalyDetector:
    def __init__(self, sequence_length=50, n_features=6):
        self.sequence_length = sequence_length
        self.n_features = n_features
        self.model = None
        self.threshold = None
        
    def build_model(self):
        """Build LSTM autoencoder for anomaly detection"""
        model = Sequential([
            # Encoder
            LSTM(64, return_sequences=True, input_shape=(self.sequence_length, self.n_features)),
            Dropout(0.2),
            LSTM(32, return_sequences=True),
            Dropout(0.2),
            LSTM(16, return_sequences=False),
            
            # Decoder
            Dense(16, activation='relu'),
            Dense(32, activation='relu'),
            Dense(self.sequence_length * self.n_features, activation='linear')
        ])
        
        model.compile(
            optimizer='adam',
            loss=tf.keras.losses.MeanSquaredError(),
            metrics=[tf.keras.metrics.MeanAbsoluteError()]
        )
        
        self.model = model
        return model
    
    def train(self, X_train, epochs=100, batch_size=32, validation_split=0.2):
        """Train the anomaly detection model"""
        if self.model is None:
            self.build_model()
        
        # Prepare target (for autoencoder, input = output)
        y_train = X_train.reshape(X_train.shape[0], -1)
        
        # Train model
        history = self.model.fit(
            X_train, y_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=validation_split,
            verbose=1,
            shuffle=True
        )
        
        # Calculate reconstruction threshold
        predictions = self.model.predict(X_train)
        mse = np.mean((predictions - y_train) ** 2, axis=1)
        self.threshold = np.percentile(mse, 95)  # 95th percentile
        
        return history
    
    def predict_anomaly(self, X_test):
        """Predict anomalies in test data"""
        if self.model is None:
            raise ValueError("Model not trained yet!")
        
        # Get predictions
        predictions = self.model.predict(X_test)
        y_test = X_test.reshape(X_test.shape[0], -1)
        
        # Calculate reconstruction error
        mse = np.mean((predictions - y_test) ** 2, axis=1)
        
        # Detect anomalies
        anomalies = mse > self.threshold
        anomaly_scores = mse / self.threshold
        
        return anomalies, anomaly_scores
    
    def save_model(self, filepath):
        """Save trained model"""
        self.model.save(filepath)
        
        # Save threshold and other parameters
        import pickle
        with open(filepath.replace('.h5', '_params.pkl'), 'wb') as f:
            pickle.dump({
                'threshold': self.threshold,
                'sequence_length': self.sequence_length,
                'n_features': self.n_features
            }, f)
    
    def load_model(self, filepath):
        """Load trained model"""
        self.model = tf.keras.models.load_model(filepath)
        
        # Load threshold and parameters
        import pickle
        with open(filepath.replace('.h5', '_params.pkl'), 'rb') as f:
            params = pickle.load(f)
            self.threshold = params['threshold']
            self.sequence_length = params['sequence_length']
            self.n_features = params['n_features']


In [3]:
def train_mouse_anomaly_detector(data_file_path, model_save_path):
    """Complete training pipeline"""
    
    # Initialize processor
    processor = MouseDataProcessor(window_size=100, sampling_rate=50)
    
    # Load and process data
    print("Loading mouse data...")
    df = processor.load_mouse_data(data_file_path)
    print(f"Loaded {len(df)} mouse movement records")
    
    # Create sequences
    print("Creating sequences...")
    sequences = processor.create_sequences(df, sequence_length=50)
    print(f"Created {len(sequences)} sequences")
    
    # Preprocess for training
    print("Preprocessing sequences...")
    sequences_scaled = processor.preprocess_for_training(sequences)
    
    # Initialize and train model
    print("Training model...")
    detector = MouseAnomalyDetector(sequence_length=50, n_features=6)
    history = detector.train(sequences_scaled, epochs=100, batch_size=32)
    
    # Save model
    print("Saving model...")
    detector.save_model(model_save_path)
    
    # Save scaler
    import pickle
    with open(model_save_path.replace('.h5', '_scaler.pkl'), 'wb') as f:
        pickle.dump(processor.scaler, f)
    
    print("Training completed!")
    return detector, processor, history

# Usage
if __name__ == "__main__":
    # Train the model
    detector, processor, history = train_mouse_anomaly_detector(
        'mouse_data.json', 
        'mouse_anomaly_model.h5'
    )


Loading mouse data...
Loaded 25640 mouse movement records
Creating sequences...
Created 25591 sequences
Preprocessing sequences...
Training model...
Epoch 1/100
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 53ms/step - loss: 0.9293 - mean_absolute_error: 0.2763 - val_loss: 1.0198 - val_mean_absolute_error: 0.3295
Epoch 2/100
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 48ms/step - loss: 0.8714 - mean_absolute_error: 0.2655 - val_loss: 1.0079 - val_mean_absolute_error: 0.3291
Epoch 3/100
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 49ms/step - loss: 0.8847 - mean_absolute_error: 0.2683 - val_loss: 1.0026 - val_mean_absolute_error: 0.3297
Epoch 4/100
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 49ms/step - loss: 0.8593 - mean_absolute_error: 0.2709 - val_loss: 0.9957 - val_mean_absolute_error: 0.3288
Epoch 5/100
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 49ms/step - loss: 0.8398



Saving model...
Training completed!
