# Smart Robotic Arm - Data Analysis

This notebook provides analysis and visualization of the robotic arm operation data.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("Smart Robotic Arm - Data Analysis Notebook")
print("===========================================")

## Load and Explore Data

In [None]:
# Load data
df = pd.read_csv('../data/dataset.csv')
df['timestamp'] = pd.to_datetime(df['timestamp'])

print(f"Dataset shape: {df.shape}")
print(f"Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")
print("\nColumn info:")
print(df.info())

# Display first few rows
df.head()

## Basic Statistics

In [None]:
# Basic statistics
print("=== OPERATION STATISTICS ===")
print(f"Total operations: {len(df)}")
print(f"Successful grabs: {df['grab_success'].sum()}")
print(f"Success rate: {df['grab_success'].mean()*100:.1f}%")
print(f"Auto mode operations: {len(df[df['mode'] == 'auto'])}")
print(f"Manual mode operations: {len(df[df['mode'] == 'manual'])}")
print(f"Total objects detected: {df['objects_detected'].sum()}")
print(f"Average distance: {df['distance_cm'].mean():.1f} cm")
print(f"Average execution time: {df['execution_time_ms'].mean():.1f} ms")

# Object detection statistics
print("\n=== OBJECT DETECTION ===")
object_counts = df['object_class'].value_counts()
print("Most detected objects:")
for obj, count in object_counts.head().items():
    if obj and obj != '':
        print(f"  {obj}: {count} times")

## Visualization - Success Rate Analysis

In [None]:
# Create subplots
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('Smart Robotic Arm - Performance Analysis', fontsize=16, fontweight='bold')

# 1. Success rate by mode
mode_success = df.groupby('mode')['grab_success'].agg(['count', 'sum', 'mean']).reset_index()
mode_success['success_rate'] = mode_success['mean'] * 100

axes[0,0].bar(mode_success['mode'], mode_success['success_rate'], 
              color=['#FF6B6B', '#4ECDC4'], alpha=0.8)
axes[0,0].set_title('Success Rate by Mode')
axes[0,0].set_ylabel('Success Rate (%)')
axes[0,0].set_ylim(0, 100)

# Add value labels on bars
for i, v in enumerate(mode_success['success_rate']):
    axes[0,0].text(i, v + 2, f'{v:.1f}%', ha='center', fontweight='bold')

# 2. Operations over time
df['date'] = df['timestamp'].dt.date
daily_ops = df.groupby('date').size()

axes[0,1].plot(daily_ops.index, daily_ops.values, marker='o', linewidth=2, markersize=6)
axes[0,1].set_title('Daily Operations')
axes[0,1].set_ylabel('Number of Operations')
axes[0,1].tick_params(axis='x', rotation=45)

# 3. Distance vs Success Rate
# Bin distances for analysis
df['distance_bin'] = pd.cut(df['distance_cm'], bins=5, labels=['Very Close', 'Close', 'Medium', 'Far', 'Very Far'])
distance_success = df.groupby('distance_bin')['grab_success'].mean() * 100

axes[1,0].bar(range(len(distance_success)), distance_success.values, 
              color='#45B7D1', alpha=0.8)
axes[1,0].set_title('Success Rate by Distance')
axes[1,0].set_ylabel('Success Rate (%)')
axes[1,0].set_xticks(range(len(distance_success)))
axes[1,0].set_xticklabels(distance_success.index, rotation=45)

# 4. Object Detection Distribution
top_objects = df['object_class'].value_counts().head(5)
if len(top_objects) > 0:
    axes[1,1].pie(top_objects.values, labels=top_objects.index, autopct='%1.1f%%', startangle=90)
    axes[1,1].set_title('Top Detected Objects')
else:
    axes[1,1].text(0.5, 0.5, 'No object data available', ha='center', va='center', transform=axes[1,1].transAxes)
    axes[1,1].set_title('Object Detection Distribution')

plt.tight_layout()
plt.show()

## Servo Movement Analysis

In [None]:
# Servo movement analysis
servo_cols = ['servo_base', 'servo_shoulder', 'servo_elbow', 'servo_wrist']

fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('Servo Movement Analysis', fontsize=16, fontweight='bold')

for i, servo in enumerate(servo_cols):
    row = i // 2
    col = i % 2
    
    # Histogram of servo positions
    axes[row, col].hist(df[servo], bins=20, alpha=0.7, color=f'C{i}', edgecolor='black')
    axes[row, col].set_title(f'{servo.replace("_", " ").title()} Position Distribution')
    axes[row, col].set_xlabel('Angle (degrees)')
    axes[row, col].set_ylabel('Frequency')
    axes[row, col].axvline(df[servo].mean(), color='red', linestyle='--', 
                          label=f'Mean: {df[servo].mean():.1f}°')
    axes[row, col].legend()

plt.tight_layout()
plt.show()

# Servo correlation matrix
plt.figure(figsize=(10, 8))
servo_corr = df[servo_cols].corr()
sns.heatmap(servo_corr, annot=True, cmap='coolwarm', center=0, 
            square=True, linewidths=0.5)
plt.title('Servo Position Correlation Matrix')
plt.show()

## Performance Optimization Insights

In [None]:
# Performance insights
print("=== PERFORMANCE OPTIMIZATION INSIGHTS ===")

# 1. Optimal distance range
successful_ops = df[df['grab_success'] == True]
if len(successful_ops) > 0:
    optimal_distance = successful_ops['distance_cm'].mean()
    distance_std = successful_ops['distance_cm'].std()
    print(f"\n1. OPTIMAL DISTANCE RANGE:")
    print(f"   Mean successful distance: {optimal_distance:.1f} cm")
    print(f"   Optimal range: {optimal_distance-distance_std:.1f} - {optimal_distance+distance_std:.1f} cm")

# 2. Best performing servo configurations
print(f"\n2. BEST SERVO CONFIGURATIONS:")
for servo in servo_cols:
    best_angle = successful_ops[servo].mean()
    print(f"   {servo}: {best_angle:.1f}° (avg for successful grabs)")

# 3. Time efficiency
if 'execution_time_ms' in df.columns:
    fast_ops = df[df['execution_time_ms'] < df['execution_time_ms'].median()]
    fast_success_rate = fast_ops['grab_success'].mean() * 100
    
    slow_ops = df[df['execution_time_ms'] >= df['execution_time_ms'].median()]
    slow_success_rate = slow_ops['grab_success'].mean() * 100
    
    print(f"\n3. EXECUTION TIME ANALYSIS:")
    print(f"   Fast operations (<{df['execution_time_ms'].median():.0f}ms): {fast_success_rate:.1f}% success")
    print(f"   Slow operations (≥{df['execution_time_ms'].median():.0f}ms): {slow_success_rate:.1f}% success")

# 4. Object-specific performance
print(f"\n4. OBJECT-SPECIFIC PERFORMANCE:")
obj_performance = df.groupby('object_class')['grab_success'].agg(['count', 'mean']).reset_index()
obj_performance = obj_performance[obj_performance['count'] >= 2]  # Only objects with 2+ attempts
obj_performance['success_rate'] = obj_performance['mean'] * 100
obj_performance = obj_performance.sort_values('success_rate', ascending=False)

for _, row in obj_performance.head().iterrows():
    if row['object_class'] and row['object_class'] != '':
        print(f"   {row['object_class']}: {row['success_rate']:.1f}% ({row['count']} attempts)")

## Machine Learning - Predictive Analysis

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

print("=== MACHINE LEARNING ANALYSIS ===")

# Prepare features for ML model
ml_df = df.copy()

# Encode categorical variables
le_mode = LabelEncoder()
ml_df['mode_encoded'] = le_mode.fit_transform(ml_df['mode'])

le_gripper = LabelEncoder()
ml_df['gripper_encoded'] = le_gripper.fit_transform(ml_df['gripper_state'])

# Select features for prediction
feature_cols = ['mode_encoded', 'objects_detected', 'distance_cm', 
                'servo_base', 'servo_shoulder', 'servo_elbow', 'servo_wrist', 
                'gripper_encoded', 'detection_confidence']

# Remove rows with missing values
ml_df_clean = ml_df[feature_cols + ['grab_success']].dropna()

if len(ml_df_clean) > 10:  # Need sufficient data for ML
    X = ml_df_clean[feature_cols]
    y = ml_df_clean['grab_success']
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    
    # Train Random Forest model
    rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = rf_model.predict(X_test)
    
    # Model performance
    print(f"\nModel Accuracy: {rf_model.score(X_test, y_test):.3f}")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))
    
    # Feature importance
    feature_importance = pd.DataFrame({
        'feature': feature_cols,
        'importance': rf_model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    print("\nFeature Importance:")
    for _, row in feature_importance.iterrows():
        print(f"   {row['feature']}: {row['importance']:.3f}")
    
    # Visualize feature importance
    plt.figure(figsize=(10, 6))
    plt.barh(feature_importance['feature'], feature_importance['importance'])
    plt.title('Feature Importance for Grab Success Prediction')
    plt.xlabel('Importance')
    plt.tight_layout()
    plt.show()
    
else:
    print("Insufficient data for machine learning analysis (need >10 complete records)")

## Export Results

In [None]:
# Export analysis results
from datetime import datetime

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# Create summary report
summary_report = {
    'analysis_date': datetime.now().isoformat(),
    'total_operations': len(df),
    'success_rate': df['grab_success'].mean() * 100,
    'auto_mode_ops': len(df[df['mode'] == 'auto']),
    'manual_mode_ops': len(df[df['mode'] == 'manual']),
    'avg_distance': df['distance_cm'].mean(),
    'avg_execution_time': df['execution_time_ms'].mean(),
    'top_objects': df['object_class'].value_counts().head().to_dict()
}

# Save summary
import json
with open(f'../data/analysis_summary_{timestamp}.json', 'w') as f:
    json.dump(summary_report, f, indent=2, default=str)

print(f"Analysis complete! Summary saved to: analysis_summary_{timestamp}.json")
print("\n=== FINAL SUMMARY ===")
for key, value in summary_report.items():
    if key != 'top_objects':
        print(f"{key}: {value}")