# Hack Tractor - Equipment Data Analysis

This notebook demonstrates how to load, analyze and visualize data collected from farm equipment using the Hack Tractor system.

In [None]:
# Import necessary libraries
import os
import sys
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go

# Enable better plots in the notebook
%matplotlib inline
plt.style.use('ggplot')
sns.set_theme()

# Add project root to path so we can import from src
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

# Import project modules (will work after adding project root to path)
try:
    from src.equipment.interfaces.can.tractor_can_interface import TractorCANInterface
    from src.ai.models.predictive_maintenance import PredictiveMaintenanceModel
    print("Successfully imported Hack Tractor modules")
except ImportError as e:
    print(f"Could not import Hack Tractor modules: {e}")
    print("Some notebook functionality may be limited")

## Loading Equipment Data

First, let's load some collected data from our equipment. This data is stored in JSON format in the `data` directory.

In [None]:
# Define function to load data from JSON files
def load_equipment_data(file_path):
    """Load equipment data from a JSON file."""
    try:
        with open(file_path, 'r') as f:
            data = json.load(f)
        print(f"Successfully loaded data from {file_path}")
        return data
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

# Path to data directory
data_dir = os.path.join(project_root, 'data')
print(f"Looking for data in: {data_dir}")

# List available data files
data_files = [f for f in os.listdir(data_dir) 
             if os.path.isfile(os.path.join(data_dir, f)) and 
             (f.endswith('.json') or f.endswith('.csv'))]

print("\nAvailable data files:")
for i, file in enumerate(data_files):
    print(f"{i+1}. {file}")

# If no files found, create sample data
if not data_files:
    print("\nNo data files found. Creating sample data for demonstration...")
    
    # Create a sample dataset
    sample_data = {
        "timestamp": datetime.now().isoformat(),
        "data": {
            "ENGINE_RPM": {
                "timestamp": datetime.now().timestamp(),
                "value": 1500
            },
            "ENGINE_TEMP": {
                "timestamp": datetime.now().timestamp(),
                "value": 85
            },
            "FUEL_LEVEL": {
                "timestamp": datetime.now().timestamp(),
                "value": 75
            },
            "VEHICLE_SPEED": {
                "timestamp": datetime.now().timestamp(),
                "value": 5.2
            },
            "HYDRAULIC_PRESSURE": {
                "timestamp": datetime.now().timestamp(),
                "value": 2000
            }
        }
    }
    
    # Save sample data
    sample_file = os.path.join(data_dir, 'sample_data.json')
    with open(sample_file, 'w') as f:
        json.dump(sample_data, f, indent=2)
    
    print(f"Created sample data file: {sample_file}")
    data_files = ['sample_data.json']

## Converting to Pandas DataFrame

For easier analysis, we'll convert the JSON data to a pandas DataFrame.

In [None]:
# Load the first available data file (or use sample data)
if data_files:
    data_path = os.path.join(data_dir, data_files[0])
    equipment_data = load_equipment_data(data_path)
    
    if equipment_data:
        # Convert to DataFrame
        records = []
        
        # Check if we have time-series data or just a single point
        if "data" in equipment_data:
            # Single time point data format
            timestamp = equipment_data.get("timestamp", datetime.now().isoformat())
            
            for param, param_data in equipment_data["data"].items():
                if isinstance(param_data, dict) and "value" in param_data:
                    # Use parameter's timestamp if available
                    param_timestamp = param_data.get("timestamp", timestamp)
                    if isinstance(param_timestamp, (int, float)):
                        param_timestamp = datetime.fromtimestamp(param_timestamp).isoformat()
                    
                    records.append({
                        "timestamp": param_timestamp,
                        "parameter": param,
                        "value": param_data["value"]
                    })
                elif isinstance(param_data, list):
                    # Multiple time points for the same parameter
                    for point in param_data:
                        if isinstance(point, dict) and "value" in point:
                            point_timestamp = point.get("timestamp", timestamp)
                            if isinstance(point_timestamp, (int, float)):
                                point_timestamp = datetime.fromtimestamp(point_timestamp).isoformat()
                            
                            records.append({
                                "timestamp": point_timestamp,
                                "parameter": param,
                                "value": point["value"]
                            })
        
        # Convert to DataFrame
        df = pd.DataFrame(records)
        
        # Convert timestamp to datetime
        if not df.empty and "timestamp" in df.columns:
            df["timestamp"] = pd.to_datetime(df["timestamp"])
            df = df.sort_values("timestamp")
        
        print(f"\nLoaded {len(df)} data points")
        print("\nDataFrame preview:")
        display(df.head())
    else:
        print("Could not load equipment data")
else:
    print("No data files available for analysis")

## Data Visualization

Now let's visualize the equipment data to gain insights.

In [None]:
# Only proceed if we have data
if 'df' in locals() and not df.empty:
    # Create an interactive time series plot using Plotly
    try:
        fig = px.line(df, x="timestamp", y="value", color="parameter",
                     title="Equipment Parameters Over Time",
                     labels={"timestamp": "Time", "value": "Value", "parameter": "Parameter"},
                     template="plotly_white")
        
        fig.update_layout(
            legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
            height=500
        )
        
        fig.show()
    except Exception as e:
        print(f"Error creating Plotly chart: {e}")
        
        # Fallback to matplotlib
        print("Falling back to matplotlib visualization...")
        plt.figure(figsize=(12, 6))
        
        for param in df["parameter"].unique():
            param_data = df[df["parameter"] == param]
            plt.plot(param_data["timestamp"], param_data["value"], label=param)
        
        plt.title("Equipment Parameters Over Time")
        plt.xlabel("Time")
        plt.ylabel("Value")
        plt.legend()
        plt.tight_layout()
        plt.show()
else:
    print("No data available for visualization")

## Statistical Analysis

Let's analyze the statistical properties of our equipment data.

In [None]:
# Only proceed if we have data
if 'df' in locals() and not df.empty:
    # Calculate statistics for each parameter
    stats = df.groupby("parameter")["value"].describe()
    print("Statistical summary for each parameter:")
    display(stats)
    
    # Create box plots to visualize the distribution
    plt.figure(figsize=(12, 6))
    sns.boxplot(x="parameter", y="value", data=df)
    plt.title("Distribution of Equipment Parameters")
    plt.xlabel("Parameter")
    plt.ylabel("Value")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()
else:
    print("No data available for statistical analysis")

## Predictive Maintenance Analysis

Let's explore how we could use this data for predictive maintenance.

In [None]:
# If we have enough data, we can train a simple predictive model
if 'df' in locals() and len(df) > 10:
    print("Creating a simple predictive maintenance simulation...")
    
    # For this example, let's simulate engine temperature trend
    if "ENGINE_TEMP" in df["parameter"].values:
        engine_temp = df[df["parameter"] == "ENGINE_TEMP"].copy()
        
        # Create a time feature (hours since start)
        if len(engine_temp) >= 2:
            start_time = engine_temp["timestamp"].min()
            engine_temp["hours"] = (engine_temp["timestamp"] - start_time).dt.total_seconds() / 3600
            
            # Plot the data
            plt.figure(figsize=(10, 5))
            plt.scatter(engine_temp["hours"], engine_temp["value"], alpha=0.7)
            
            # Add a trend line
            try:
                from sklearn.linear_model import LinearRegression
                
                # Create a simple linear regression model
                X = engine_temp[["hours"]]
                y = engine_temp["value"]
                model = LinearRegression().fit(X, y)
                
                # Predict values for plotting the trend line
                hours_range = np.linspace(0, engine_temp["hours"].max() * 1.5, 100)
                predicted_temp = model.predict(hours_range.reshape(-1, 1))
                
                plt.plot(hours_range, predicted_temp, 'r-', label="Trend")
                
                # Add warning threshold
                plt.axhline(y=110, color='orange', linestyle='--', label="Warning Threshold")
                
                # Calculate time to warning threshold
                if model.coef_[0] > 0:  # Only if temperature is increasing
                    time_to_threshold = (110 - model.intercept_) / model.coef_[0]
                    plt.axvline(x=time_to_threshold, color='red', linestyle='--', 
                                label=f"Predicted Warning at {time_to_threshold:.1f} hours")
                    
                    print(f"\nBased on current trend, engine will reach warning temperature after {time_to_threshold:.1f} hours of operation")
                    
                    # If we're getting close, show a warning
                    if 0 < time_to_threshold < 10:
                        print(f"⚠️ WARNING: Engine temperature trending toward threshold within {time_to_threshold:.1f} hours!")
                        print("Recommended action: Schedule preventive maintenance soon")
                
            except Exception as e:
                print(f"Error in prediction: {e}")
            
            plt.title("Engine Temperature Trend Analysis")
            plt.xlabel("Operating Hours")
            plt.ylabel("Temperature (°C)")
            plt.legend()
            plt.grid(True)
            plt.tight_layout()
            plt.show()
            
    else:
        print("No engine temperature data available for predictive analysis")
else:
    print("Not enough data points for predictive analysis")

## Next Steps

Based on this analysis, here are some potential next steps:

1. Collect more data over longer periods to improve predictive accuracy
2. Implement real-time alerts when parameters approach warning thresholds
3. Develop machine learning models to predict multiple maintenance needs
4. Integrate with weather data to analyze environmental impacts on equipment
5. Create custom dashboards for farmers to monitor their equipment