# Equipment Maintenance Analysis System

This notebook implements a comprehensive analysis system for industrial equipment maintenance data, providing detailed insights, visualizations, and actionable recommendations.

## Setup and Dependencies
First, let's install all required packages:

In [None]:
!pip install pandas matplotlib seaborn numpy scipy scikit-learn joblib requests plotly kaleido weasyprint jinja2

## Import Required Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import numpy as np
from scipy import stats
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.ensemble import IsolationForest
from sklearn.decomposition import PCA
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import json
import os
import base64
from IPython.display import HTML, display
import jinja2

# Set style for better visualization
plt.style.use('seaborn')
sns.set_style("whitegrid")

## HTML Report Template
Define the HTML template for the report:

In [None]:
HTML_TEMPLATE = """
<!DOCTYPE html>
<html>
<head>
    <title>Equipment Maintenance Analysis Report</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            line-height: 1.6;
            margin: 40px;
            color: #333;
        }
        h1 {
            color: #2c3e50;
            border-bottom: 2px solid #3498db;
            padding-bottom: 10px;
        }
        h2 {
            color: #34495e;
            margin-top: 30px;
        }
        .section {
            margin: 20px 0;
            padding: 20px;
            background: #f9f9f9;
            border-radius: 5px;
        }
        .visualization {
            margin: 20px 0;
            text-align: center;
        }
        .insight {
            background: #fff;
            padding: 15px;
            margin: 10px 0;
            border-left: 4px solid #3498db;
            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
        }
        table {
            width: 100%;
            border-collapse: collapse;
            margin: 20px 0;
        }
        th, td {
            padding: 12px;
            text-align: left;
            border-bottom: 1px solid #ddd;
        }
        th {
            background-color: #f2f2f2;
        }
        .high-impact { border-left-color: #e74c3c; }
        .medium-impact { border-left-color: #f1c40f; }
        .low-impact { border-left-color: #2ecc71; }
    </style>
</head>
<body>
    <h1>Equipment Maintenance Analysis Report</h1>
    
    <div class="section">
        <h2>Overview</h2>
        {{ overview_content }}
    </div>

    <div class="section">
        <h2>Equipment Analysis</h2>
        {{ equipment_analysis }}
    </div>

    <div class="section">
        <h2>Cost Analysis</h2>
        {{ cost_analysis }}
    </div>

    <div class="section">
        <h2>AI-Driven Insights</h2>
        {{ ai_insights }}
    </div>

    <div class="section">
        <h2>Recommendations</h2>
        {{ recommendations }}
    </div>
</body>
</html>
"""

## MaintenanceAnalyzer Class Implementation

In [None]:
class MaintenanceAnalyzer:
    """A comprehensive analyzer for equipment maintenance data with AI-driven insights."""
    
    REQUIRED_COLUMNS = [
        'Equipment Name', 'Equipment ID', 'Criticality level',
        'Task Id', 'Task Description', 'Start date', 'End Date',
        'Duration', 'Maintenance cost'
    ]

    DATE_FORMAT = '%Y-%m-%d'

    def __init__(self, data_path: str):
        """Initialize the analyzer with the data file path."""
        self._load_and_validate_data(data_path)
        self._preprocess_data()

    def _load_and_validate_data(self, data_path: str) -> None:
        """Load and validate the input data."""
        try:
            self.df = pd.read_csv(data_path)
            self.df['Start date'] = pd.to_datetime(self.df['Start date'])
            self.df['End Date'] = pd.to_datetime(self.df['End Date'])
            
            missing_columns = [col for col in self.REQUIRED_COLUMNS if col not in self.df.columns]
            if missing_columns:
                raise ValueError(f"Missing required columns: {', '.join(missing_columns)}")

        except Exception as e:
            raise ValueError(f"Error reading CSV file: {str(e)}")

    def _preprocess_data(self):
        """Enhanced preprocessing with AI-driven feature engineering."""
        # Calculate derived features
        self.df['month_year'] = self.df['Start date'].dt.to_period('M')
        self.df['Season'] = self.df['Start date'].dt.month.map({
            12: 'Winter', 1: 'Winter', 2: 'Winter',
            3: 'Spring', 4: 'Spring', 5: 'Spring',
            6: 'Summer', 7: 'Summer', 8: 'Summer',
            9: 'Fall', 10: 'Fall', 11: 'Fall'
        })
        
        self.df['Cost per Day'] = self.df['Maintenance cost'] / self.df['Duration'].replace(0, 1)
        
        # AI-driven feature engineering
        self._engineer_ai_features()

    def _engineer_ai_features(self):
        """Create AI-driven features for enhanced analysis."""
        try:
            numeric_columns = self.df.select_dtypes(include=[np.number]).columns
            if len(numeric_columns) > 0:
                numeric_data = self.df[numeric_columns].fillna(0)
                scaler = StandardScaler()
                scaled_data = scaler.fit_transform(numeric_data)
                
                # Equipment clustering
                kmeans = KMeans(n_clusters=min(5, len(self.df)), random_state=42)
                self.df['equipment_cluster'] = kmeans.fit_predict(scaled_data)
                
                # Anomaly detection
                iso_forest = IsolationForest(random_state=42, contamination=0.1)
                self.df['maintenance_anomaly'] = iso_forest.fit_predict(scaled_data)
                
                # Pattern detection
                if scaled_data.shape[1] >= 2:
                    pca = PCA(n_components=2)
                    pca_result = pca.fit_transform(scaled_data)
                    self.df['maintenance_pattern_1'] = pca_result[:, 0]
                    self.df['maintenance_pattern_2'] = pca_result[:, 1]
                    
        except Exception as e:
            print(f"Warning: Error during AI feature engineering: {str(e)}")
            self.df['equipment_cluster'] = 0
            self.df['maintenance_anomaly'] = 1
            self.df['maintenance_pattern_1'] = 0
            self.df['maintenance_pattern_2'] = 0

    def equipment_cost_analysis(self):
        """Analyze maintenance costs by equipment type."""
        equipment_costs = self.df.groupby('Equipment Name').agg({
            'Maintenance cost': ['sum', 'mean', 'count'],
            'Duration': 'sum'
        }).round(2)
        
        equipment_costs.columns = ['Total Cost', 'Average Cost', 'Number of Tasks', 'Total Duration']
        return equipment_costs

    def criticality_analysis(self):
        """Analyze maintenance patterns by criticality level."""
        criticality_metrics = self.df.groupby('Criticality level').agg({
            'Maintenance cost': ['sum', 'mean'],
            'Duration': 'mean',
            'Equipment ID': 'nunique'
        }).round(2)
        
        criticality_metrics.columns = ['Total Cost', 'Average Cost', 'Average Duration', 'Unique Equipment Count']
        return criticality_metrics

    def plot_interactive_cost_distribution(self):
        """Create an interactive plot of maintenance cost distribution."""
        fig = px.histogram(
            self.df,
            x='Maintenance cost',
            color='Criticality level',
            title='Distribution of Maintenance Costs by Criticality Level',
            labels={'Maintenance cost': 'Maintenance Cost ($)'},
            template='plotly_white'
        )
        fig.update_layout(showlegend=True, title_x=0.5)
        return fig

    def plot_interactive_temporal_trends(self):
        """Create an interactive plot of maintenance cost trends."""
        monthly_data = self.df.groupby('month_year')['Maintenance cost'].sum().reset_index()
        monthly_data['month_year'] = monthly_data['month_year'].astype(str)
        
        fig = px.line(
            monthly_data,
            x='month_year',
            y='Maintenance cost',
            title='Temporal Trends in Maintenance Costs',
            labels={'month_year': 'Month-Year', 'Maintenance cost': 'Total Maintenance Cost ($)'},
            template='plotly_white'
        )
        fig.update_layout(showlegend=True, title_x=0.5)
        return fig

    def plot_interactive_ai_insights(self):
        """Create interactive AI-driven visualizations."""
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=(
                'Equipment Maintenance Clusters',
                'Maintenance Anomalies',
                'Cost vs Duration by Cluster',
                'Pattern Evolution'
            )
        )

        # Plot 1: Equipment Clusters
        fig.add_trace(
            go.Scatter(
                x=self.df['maintenance_pattern_1'],
                y=self.df['maintenance_pattern_2'],
                mode='markers',
                marker=dict(color=self.df['equipment_cluster'], colorscale='Viridis'),
                name='Clusters'
            ),
            row=1, col=1
        )

        # Plot 2: Anomalies
        fig.add_trace(
            go.Scatter(
                x=self.df['maintenance_pattern_1'],
                y=self.df['maintenance_pattern_2'],
                mode='markers',
                marker=dict(color=self.df['maintenance_anomaly'], colorscale='RdYlGn'),
                name='Anomalies'
            ),
            row=1, col=2
        )

        # Plot 3: Cost vs Duration by Cluster
        fig.add_trace(
            go.Scatter(
                x=self.df['Duration'],
                y=self.df['Maintenance cost'],
                mode='markers',
                marker=dict(color=self.df['equipment_cluster'], colorscale='Viridis'),
                name='Cost vs Duration'
            ),
            row=2, col=1
        )

        # Plot 4: Pattern Evolution
        timeline = self.df.sort_values('Start date').reset_index()
        fig.add_trace(
            go.Scatter(
                x=range(len(timeline)),
                y=timeline['maintenance_pattern_1'],
                mode='lines+markers',
                marker=dict(color=timeline['equipment_cluster'], colorscale='Viridis'),
                name='Pattern Evolution'
            ),
            row=2, col=2
        )

        fig.update_layout(
            height=800,
            showlegend=True,
            title_text="AI-Driven Maintenance Analysis",
            template='plotly_white'
        )
        return fig

    def generate_html_report(self):
        """Generate an HTML report with all analyses and visualizations."""
        # Basic statistics
        overview = f"""
        <div class='insight'>
            <h3>Summary Statistics</h3>
            <p>Total Equipment: {len(self.df['Equipment Name'].unique())}</p>
            <p>Total Maintenance Tasks: {len(self.df)}</p>
            <p>Total Cost: ${self.df['Maintenance cost'].sum():,.2f}</p>
            <p>Average Cost per Task: ${self.df['Maintenance cost'].mean():,.2f}</p>
        </div>
        """

        # Equipment analysis
        equipment_analysis = f"""
        <div class='insight'>
            <h3>Equipment Cost Analysis</h3>
            {self.equipment_cost_analysis().to_html()}
        </div>
        <div class='insight'>
            <h3>Criticality Analysis</h3>
            {self.criticality_analysis().to_html()}
        </div>
        """

        # Generate the full report using the template
        template = jinja2.Template(HTML_TEMPLATE)
        report = template.render(
            overview_content=overview,
            equipment_analysis=equipment_analysis,
            cost_analysis="",  # Will be filled by the comprehensive report generator
            ai_insights="",    # Will be filled by the comprehensive report generator
            recommendations=""  # Will be filled by the comprehensive report generator
        )

        return report

## PDF Report Generation Functions

In [None]:
def generate_pdf_from_html(html_content, output_file='maintenance_report.pdf'):
    """Generate PDF report from HTML content using weasyprint."""
    from weasyprint import HTML, CSS
    from weasyprint.text.fonts import FontConfiguration

    # Configure fonts
    font_config = FontConfiguration()
    
    # Create PDF
    HTML(string=html_content).write_pdf(
        output_file,
        font_config=font_config,
        presentational_hints=True
    )
    return output_file

## AI Insights Generation

In [None]:
def generate_maintenance_insights(data):
    """Generate maintenance insights using statistical analysis."""
    insights = []
    
    # Cost patterns
    high_cost_threshold = data['Maintenance cost'].mean() + data['Maintenance cost'].std()
    high_cost_equipment = data[data['Maintenance cost'] > high_cost_threshold]['Equipment Name'].value_counts()
    
        # Frequency patterns
    equipment_frequency = data['Equipment Name'].value_counts()
    high_frequency_equipment = equipment_frequency[equipment_frequency > equipment_frequency.mean() + equipment_frequency.std()]
    
    # Duration patterns
    long_duration_threshold = data['Duration'].mean() + data['Duration'].std()
    long_duration_tasks = data[data['Duration'] > long_duration_threshold]
    
    # Generate insights
    if len(high_cost_equipment) > 0:
        insights.append({
            'category': 'Cost Analysis',
            'finding': f'Found {len(high_cost_equipment)} equipment types with above-average maintenance costs',
            'details': f'Top equipment: {", ".join(high_cost_equipment.head(3).index)}',
            'impact': 'High',
            'action': 'Review maintenance procedures for high-cost equipment'
        })
    
    if len(high_frequency_equipment) > 0:
        insights.append({
            'category': 'Maintenance Frequency',
            'finding': f'Identified {len(high_frequency_equipment)} equipment types requiring frequent maintenance',
            'details': f'Most frequent: {", ".join(high_frequency_equipment.head(3).index)}',
            'impact': 'Medium',
            'action': 'Investigate root causes of frequent maintenance needs'
        })
    
    if len(long_duration_tasks) > 0:
        insights.append({
            'category': 'Maintenance Duration',
            'finding': f'Found {len(long_duration_tasks)} maintenance tasks with extended durations',
            'details': f'Average extended duration: {long_duration_tasks["Duration"].mean():.1f} days',
            'impact': 'Medium',
            'action': 'Optimize maintenance procedures to reduce downtime'
        })
    
    return insights

## Enhanced Report Generation

In [None]:
def generate_comprehensive_report(analyzer):
    """Generate comprehensive report with HTML and PDF versions."""
    # Generate AI insights
    insights = generate_maintenance_insights(analyzer.df)
    
    # Convert insights to HTML
    insights_html = """
    <div class='insights-container'>
    """
    for insight in insights:
        impact_class = insight['impact'].lower() + '-impact'
        insights_html += f"""
        <div class='insight {impact_class}'>
            <h3>{insight['category']}</h3>
            <p><strong>Finding:</strong> {insight['finding']}</p>
            <p><strong>Details:</strong> {insight['details']}</p>
            <p><strong>Recommended Action:</strong> {insight['action']}</p>
        </div>
        """
    insights_html += "</div>"
    
    # Generate HTML report
    html_report = analyzer.generate_html_report()
    
    # Add insights to HTML report
    html_report = html_report.replace('{{ ai_insights }}', insights_html)
    
    # Save HTML report
    with open('maintenance_report.html', 'w', encoding='utf-8') as f:
        f.write(html_report)
    
    # Generate PDF report
    pdf_file = generate_pdf_from_html(html_report)
    
    return html_report, pdf_file

## Generate Complete Analysis with Reports

In [None]:
# Generate comprehensive reports
html_report, pdf_file = generate_comprehensive_report(analyzer)

# Display HTML report in notebook
display(HTML(html_report))

print(f"\nReports have been generated:")
print(f"HTML Report: maintenance_report.html")
print(f"PDF Report: {pdf_file}")

# Download options
from google.colab import files
files.download('maintenance_report.html')
files.download(pdf_file)