# Data Generation Module

This notebook generates synthetic GPS data and satellite image simulation for poaching detection.


In [None]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import os
import cv2
from utils import set_random_seed, generate_animal_id, save_dataframe


## DataGenerator Class


In [None]:
class DataGenerator:
    """Generate synthetic GPS and satellite image data for poaching detection."""
    
    def __init__(self, seed=42):
        """Initialize the data generator."""
        set_random_seed(seed)
        self.output_dir = 'output'
        os.makedirs(self.output_dir, exist_ok=True)
        
    def generate_gps_data(self, num_animals=10, days=7, points_per_day=24):
        """
        Generate synthetic GPS tracking data for animals.
        
        Args:
            num_animals: Number of animals to track
            days: Number of days of data
            points_per_day: GPS points per day (every hour)
        
        Returns:
            DataFrame with GPS tracking data
        """
        print("Generating synthetic GPS tracking data...")
        
        # Define wildlife reserve boundaries (example coordinates)
        reserve_center_lat = -1.2921  # Example: Kenya wildlife reserve
        reserve_center_lon = 36.8219
        reserve_radius = 0.05  # ~5.5km radius
        
        gps_data = []
        start_time = datetime.now() - timedelta(days=days)
        
        for animal_idx in range(num_animals):
            animal_id = generate_animal_id(animal_idx)
            
            # Generate base movement pattern (random walk)
            base_lat = reserve_center_lat + np.random.normal(0, 0.01)
            base_lon = reserve_center_lon + np.random.normal(0, 0.01)
            
            # Generate normal movement pattern
            for day in range(days):
                for hour in range(points_per_day):
                    timestamp = start_time + timedelta(days=day, hours=hour)
                    
                    # Normal movement: small random walk
                    lat_offset = np.random.normal(0, 0.001)
                    lon_offset = np.random.normal(0, 0.001)
                    
                    current_lat = base_lat + lat_offset
                    current_lon = base_lon + lon_offset
                    
                    # Add some anomalies (poaching scenarios)
                    is_anomaly = False
                    anomaly_type = None
                    
                    # 5% chance of anomaly per point
                    if np.random.random() < 0.05:
                        is_anomaly = True
                        anomaly_type = np.random.choice(['sudden_stop', 'rapid_movement', 'human_proximity'])
                        
                        if anomaly_type == 'sudden_stop':
                            # Animal stops moving (same location for multiple points)
                            pass
                        elif anomaly_type == 'rapid_movement':
                            # Unusual rapid movement
                            current_lat += np.random.normal(0, 0.005)
                            current_lon += np.random.normal(0, 0.005)
                        elif anomaly_type == 'human_proximity':
                            # Move towards human activity area
                            current_lat += np.random.normal(0.002, 0.001)
                            current_lon += np.random.normal(0.002, 0.001)
                    
                    gps_data.append({
                        'animal_id': animal_id,
                        'latitude': current_lat,
                        'longitude': current_lon,
                        'timestamp': timestamp,
                        'is_anomaly': is_anomaly,
                        'anomaly_type': anomaly_type
                    })
        
        gps_df = pd.DataFrame(gps_data)
        save_dataframe(gps_df, 'gps_tracking_data.csv', self.output_dir)
        
        print(f"Generated GPS data for {num_animals} animals over {days} days")
        print(f"Total GPS points: {len(gps_df)}")
        print(f"Anomalies detected: {gps_df['is_anomaly'].sum()}")
        
        return gps_df
    
    def generate_satellite_images(self, num_images=50):
        """
        Generate synthetic satellite images for object detection.
        In a real scenario, this would load actual satellite images.
        
        Args:
            num_images: Number of images to generate
        
        Returns:
            List of image file paths
        """
        print("Generating synthetic satellite images...")
        
        images_dir = os.path.join(self.output_dir, 'satellite_images')
        os.makedirs(images_dir, exist_ok=True)
        
        image_paths = []
        
        for i in range(num_images):
            # Create a synthetic satellite image (green background for vegetation)
            img = np.random.randint(50, 150, (512, 512, 3), dtype=np.uint8)
            
            # Add some vegetation patterns
            for _ in range(100):
                x, y = np.random.randint(0, 512, 2)
                cv2.circle(img, (x, y), np.random.randint(2, 8), (0, 100, 0), -1)
            
            # Randomly add humans or vehicles (simulated)
            if np.random.random() < 0.3:  # 30% chance of human/vehicle
                x, y = np.random.randint(50, 462, 2)
                if np.random.random() < 0.5:
                    # Human (small red rectangle)
                    cv2.rectangle(img, (x, y), (x+10, y+20), (0, 0, 255), -1)
                else:
                    # Vehicle (larger blue rectangle)
                    cv2.rectangle(img, (x, y), (x+30, y+15), (255, 0, 0), -1)
            
            # Save image
            image_path = os.path.join(images_dir, f'satellite_{i:06d}.jpg')
            cv2.imwrite(image_path, img)
            image_paths.append(image_path)
        
        print(f"Generated {num_images} synthetic satellite images")
        return image_paths
    
    def create_image_metadata(self, image_paths):
        """
        Create metadata for satellite images with GPS coordinates.
        
        Args:
            image_paths: List of image file paths
        
        Returns:
            DataFrame with image metadata
        """
        print("Creating image metadata...")
        
        # Define image capture area (overlapping with GPS data area)
        reserve_center_lat = -1.2921
        reserve_center_lon = 36.8219
        
        metadata = []
        for i, image_path in enumerate(image_paths):
            # Random GPS coordinates within the reserve
            lat = reserve_center_lat + np.random.normal(0, 0.02)
            lon = reserve_center_lon + np.random.normal(0, 0.02)
            
            metadata.append({
                'image_id': f'IMG_{i:06d}',
                'image_path': image_path,
                'latitude': lat,
                'longitude': lon,
                'timestamp': datetime.now() - timedelta(hours=np.random.randint(0, 168))
            })
        
        metadata_df = pd.DataFrame(metadata)
        save_dataframe(metadata_df, 'image_metadata.csv', self.output_dir)
        
        return metadata_df


## Test Data Generation


In [None]:
# Test data generation
generator = DataGenerator()
gps_data = generator.generate_gps_data()
image_paths = generator.generate_satellite_images()
image_metadata = generator.create_image_metadata(image_paths)
