# anomaly_detector

GPS anomaly detection module using IsolationForest and movement pattern analysis.

In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from datetime import datetime, timedelta
import warnings

## Code

In [None]:
# GPS anomaly detection module using IsolationForest and movement pattern analysis.

warnings.filterwarnings('ignore')
from utils import save_dataframe, calculate_distance

class AnomalyDetector:
    """Anomaly detection for GPS tracking data."""
    
    def __init__(self, contamination=0.1):
        """
        Initialize the anomaly detector.
        Args:
            contamination: Expected proportion of anomalies in the dataset
        """
        self.contamination = contamination
        self.isolation_forest = IsolationForest(
            contamination=contamination,
            random_state=42,
            n_estimators=100
        )
        self.scaler = StandardScaler()
    
    def extract_features(self, gps_df):
        """
        Extract movement features from GPS data.
        
        Args:
            gps_df: DataFrame with GPS tracking data
            
        Returns:
            DataFrame with extracted features
        """
        features = []
        
        # Group by animal
        for animal_id in gps_df['animal_id'].unique():
            animal_data = gps_df[gps_df['animal_id'] == animal_id].sort_values('timestamp')
            
            # Calculate movement features for each point
            for idx in range(len(animal_data)):
                if idx == 0:
                    # First point - no previous data
                    features.append({
                        'animal_id': animal_id,
                        'timestamp': animal_data.iloc[idx]['timestamp'],
                        'latitude': animal_data.iloc[idx]['latitude'],
                        'longitude': animal_data.iloc[idx]['longitude'],
                        'speed': 0,
                        'distance_moved': 0,
                        'direction_change': 0,
                        'acceleration': 0
                    })
                else:
                    prev_row = animal_data.iloc[idx-1]
                    curr_row = animal_data.iloc[idx]
                    
                    # Calculate distance moved
                    dist = calculate_distance(
                        prev_row['latitude'], prev_row['longitude'],
                        curr_row['latitude'], curr_row['longitude']
                    )
                    
                    # Calculate time difference (in hours)
                    time_diff = (curr_row['timestamp'] - prev_row['timestamp']).total_seconds() / 3600
                    
                    # Calculate speed (m/h)
                    speed = dist / time_diff if time_diff > 0 else 0
                    
                    # Calculate direction change (simplified)
                    lat_change = curr_row['latitude'] - prev_row['latitude']
                    lon_change = curr_row['longitude'] - prev_row['longitude']
                    direction_change = np.sqrt(lat_change**2 + lon_change**2)
                    
                    # Calculate acceleration
                    if idx >= 2:
                        prev_prev_row = animal_data.iloc[idx-2]
                        prev_dist = calculate_distance(
                            prev_prev_row['latitude'], prev_prev_row['longitude'],
                            prev_row['latitude'], prev_row['longitude']
                        )
                        prev_time_diff = (prev_row['timestamp'] - prev_prev_row['timestamp']).total_seconds() / 3600
                        prev_speed = prev_dist / prev_time_diff if prev_time_diff > 0 else 0
                        acceleration = (speed - prev_speed) / time_diff if time_diff > 0 else 0
                    else:
                        acceleration = 0
                    
                    features.append({
                        'animal_id': animal_id,
                        'timestamp': curr_row['timestamp'],
                        'latitude': curr_row['latitude'],
                        'longitude': curr_row['longitude'],
                        'speed': speed,
                        'distance_moved': dist,
                        'direction_change': direction_change,
                        'acceleration': acceleration
                    })
        
        return pd.DataFrame(features)
    
    def detect_anomalies(self, gps_df):
        """
        Detect anomalies in GPS tracking data.
        
        Args:
            gps_df: DataFrame with GPS tracking data
            
        Returns:
            DataFrame with anomaly predictions
        """
        print("Extracting movement features...")
        features_df = self.extract_features(gps_df)
        
        # Prepare features for model
        feature_columns = ['speed', 'distance_moved', 'direction_change', 'acceleration']
        X = features_df[feature_columns].fillna(0)
        
        # Scale features
        X_scaled = self.scaler.fit_transform(X)
        
        # Detect anomalies
        print("Running IsolationForest anomaly detection...")
        anomaly_predictions = self.isolation_forest.fit_predict(X_scaled)
        anomaly_scores = self.isolation_forest.score_samples(X_scaled)
        
        # Add predictions to dataframe
        features_df['anomaly_score'] = anomaly_scores
        features_df['is_anomaly'] = (anomaly_predictions == -1).astype(int)
        
        # Save results
        save_dataframe(features_df, 'gps_anomalies.csv', 'output')
        
        print(f"Detected {features_df['is_anomaly'].sum()} anomalies out of {len(features_df)} GPS points")
        
        return features_df
    
    def get_anomaly_summary(self, anomalies_df):
        """
        Get summary statistics for anomaly detection.
        
        Args:
            anomalies_df: DataFrame with anomaly detection results
            
        Returns:
            Dictionary with summary statistics
        """
        total_points = len(anomalies_df)
        anomalies_detected = anomalies_df['is_anomaly'].sum()
        
        summary = {
            'total_points': total_points,
            'anomalies_detected': anomalies_detected,
            'anomaly_rate': (anomalies_detected / total_points * 100) if total_points > 0 else 0,
            'unique_animals': anomalies_df['animal_id'].nunique()
        }
        
        return summary


## Test Code

In [None]:
    # Test the anomaly detector
    detector = AnomalyDetector()
    # Load sample GPS data
    if os.path.exists('output/gps_tracking_data.csv'):
        gps_df = pd.read_csv('output/gps_tracking_data.csv')
        gps_df['timestamp'] = pd.to_datetime(gps_df['timestamp'])
        anomalies = detector.detect_anomalies(gps_df)
        summary = detector.get_anomaly_summary(anomalies)
        print("Anomaly Summary:", summary)
        details = detector.get_anomaly_details(anomalies)
        if not details.empty:
            print("\nTop 5 anomalies:")
            print(details[['animal_id', 'timestamp', 'anomaly_severity', 'anomaly_type', 'anomaly_score']].head())
    else:
        print("No GPS data found. Run data_generator.py first.")