# AIOps Pipeline: From Log to Stored Playbook

### This script automates the entire AIOps workflow:
### 1. Sets up `logs`, `report`, and `playbook` buckets in MinIO.
### 2. Reads a log file from the `logs` bucket.
### 3. Queries an AI model to generate a detailed incident report.
### 4. Uploads the full report as a .txt file to the `report` bucket.
### 5. Parses the report to extract the affected host and service name.
### 6. Generates a templated Ansible playbook.
### 7. Uploads the final playbook as a .yml file to the `playbook` bucket.

In [1]:

import requests
import json
import boto3
from botocore.exceptions import ClientError, NoCredentialsError
import sys
import re
import yaml

## Configuration

In [18]:

# AI Model Settings
INFERENCE_ENDPOINT = "https://granite-aiops.apps.cluster-hdmxf.hdmxf.sandbox689.opentlc.com"
MODEL_API_URL = f"{INFERENCE_ENDPOINT}/v1/completions"
MODEL_NAME = "granite"

# MinIO Storage Settings
MINIO_ENDPOINT = "minio-api-aiops.apps.cluster-hdmxf.hdmxf.sandbox689.opentlc.com"
MINIO_ACCESS_KEY = "minio"
MINIO_SECRET_KEY = "minio123"
LOGS_BUCKET = "logs"
REPORTS_BUCKET = "report"
PLAYBOOKS_BUCKET = "playbook"
LOG_FILE = "service_error_down.txt"


## Reading from logs bucket in MinIo 


In [19]:
s3_client = boto3.client('s3', endpoint_url=f"http://{MINIO_ENDPOINT}", aws_access_key_id=MINIO_ACCESS_KEY, aws_secret_access_key=MINIO_SECRET_KEY)

In [26]:

def setup_minio_buckets(s3_client, buckets_to_create):
    """Checks if buckets exist and creates them if they don't."""
    print("--> Step 1: Setting up MinIO buckets...")
    for bucket in buckets_to_create:
        try:
            s3_client.head_bucket(Bucket=bucket)
            print(f"    ✅ Bucket '{bucket}' already exists.")
        except ClientError as e:
            if e.response['Error']['Code'] == '404':
                try:
                    s3_client.create_bucket(Bucket=bucket)
                    print(f"    ✅ Bucket '{bucket}' created successfully.")
                except Exception as create_e:
                    print(f"    ❌ Error creating bucket '{bucket}': {create_e}")
                    return False
            else:
                print(f"    ❌ Error checking bucket '{bucket}': {e}")
                return False
    return True

def read_log_from_minio(s3_client, bucket_name, object_name):
    """Reads a log file's content from a MinIO bucket."""
    print(f"\n--> Step 2: Reading '{object_name}' from bucket '{bucket_name}'...")
    try:
        response = s3_client.get_object(Bucket=bucket_name, Key=object_name)
        content = response['Body'].read().decode('utf-8')
        print(f"    ✅ Successfully read log content from MinIO.")
        return content
    except ClientError as e:
        if e.response['Error']['Code'] == 'NoSuchKey':
            print(f"    ❌ Error: The object '{object_name}' was not found in bucket '{bucket_name}'.")
        else:
            print(f"    ❌ An error occurred while reading from MinIO: {e}")
    return None


In [27]:
read_log_from_minio(s3_client, LOGS_BUCKET, LOG_FILE)


--> Step 2: Reading 'service_error_down.txt' from bucket 'logs'...
    ✅ Successfully read log content from MinIO.




## Query LLM to generate incident report based on logs 

In [28]:

def query_model_for_report(logs):
    """Queries the AI model to generate a full incident report."""
    print(f"\n--> Step 2: Querying AI model for full report...")
    prompt = f"""
Generate a concise, structured AIOps incident report based on the following logs. The service name must be the short, executable name (e.g., 'httpd').
The report must be in Markdown format and include:
1. Key metadata: INCIDENT ID, DETECTED, SEVERITY, STATUS, AFFECTED SERVICE, AFFECTED HOST, SUMMARY, and KEY ERROR LOG.
2. A ROOT CAUSE section explaining the 'why'.
3. A REMEDIATION PLAYBOOK section with exact, numbered shell commands to fix the issue.
4. A VALIDATION section with a command to confirm the fix.

--- LOGS START ---
{logs}
--- LOGS END ---
"""
    headers = {"Content-Type": "application/json"}
    payload = {"model": MODEL_NAME, "prompt": prompt, "max_tokens": 512}
    
    try:
        response = requests.post(MODEL_API_URL, headers=headers, json=payload, verify=False)
        response.raise_for_status()
        result = response.json()
        if 'choices' in result and result['choices']:
            report_text = result['choices'][0].get('text', 'Error: Could not extract report.')
            print("    ✅ AI analysis complete.")
            return report_text
    except Exception as e:
        print(f"    ❌ Error querying model: {e}")
    return None

def save_report_to_json(report_text, filename="report.json"):
    """Saves the raw report text into a JSON file."""
    print(f"\n--> Step 3: Saving full report to '{filename}'...")
    try:
        report_data = {"report_content": report_text}
        with open(filename, 'w') as f:
            json.dump(report_data, f, indent=4)
        print(f"    ✅ Successfully saved full report.")
        return filename
    except Exception as e:
        print(f"    ❌ Error saving report to JSON: {e}")
    return None


## Uploading report to minio 

In [29]:


def upload_to_minio(s3_client, bucket, object_name, content):
    """Uploads content to a specified MinIO bucket."""
    print(f"--> Uploading '{object_name}' to bucket '{bucket}'...")
    try:
        s3_client.put_object(Body=content.encode('utf-8'), Bucket=bucket, Key=object_name)
        print(f"    ✅ Successfully uploaded '{object_name}'.")
        return True
    except Exception as e:
        print(f"    ❌ Error uploading to MinIO: {e}")
        return False

## Generate Ansible playbook for remediation

In [32]:

def extract_and_save_keywords(report_filename="report.json", output_filename="extracted_keywords.json"):
    """Reads the full report, extracts keywords, and saves them to a new file."""
    print(f"\n--> Step 6: Extracting keywords from '{report_filename}'...")
    try:
        with open(report_filename, 'r') as f:
            data = json.load(f)
        report_content = data.get("report_content")

        details = {}
        host_pattern = re.search(r"AFFECTED HOST:\s*(.*)", report_content, re.IGNORECASE)
        service_pattern = re.search(r"AFFECTED SERVICE:\s*(.*)", report_content, re.IGNORECASE)

        if host_pattern:
            clean_host = host_pattern.group(1).strip().strip('*').strip()
            details["affected_host"] = clean_host
        if service_pattern:
            raw_service = service_pattern.group(1).strip().strip('*').strip()
            details["affected_service"] = raw_service.replace('.service', '') if raw_service.endswith('.service') else raw_service
            
        if "affected_host" in details and "affected_service" in details:
            print(f"    ✅ Extracted Host:    '{details['affected_host']}'")
            print(f"    ✅ Affected Service: '{details['affected_service']}'")
            
            with open(output_filename, 'w') as out_f:
                json.dump(details, out_f, indent=4)
            print(f"    ✅ Successfully saved keywords to '{output_filename}'.")
            return details
        else:
            print("    ❌ Failed to extract keywords.")
            return None
            
    except FileNotFoundError:
        print(f"    ❌ Error: The file '{report_filename}' was not found.")
    except Exception as e:
        print(f"    ❌ An error occurred during keyword extraction: {e}")
    return None

def generate_ansible_playbook(host, service):
    """Generates a detailed Ansible playbook in YAML format."""
    print(f"\n--> Step 6: Generating Ansible playbook...")
    
    # This dictionary structure represents the desired Ansible Playbook format.
    playbook_data = [
        {
            'name': 'Restore HTTPD Service',
            'hosts': host,
            'become': True,
            'vars': {
                'max_retries': 3,
                'retry_delay': 10
            },
            'tasks': [
                {
                    'name': f'Ensure {service} is installed',
                    'ansible.builtin.dnf': {
                        'name': service,
                        'state': 'present'
                    }
                },
                {
                    'name': f'Start and enable {service} service',
                    'ansible.builtin.service': {
                        'name': service,
                        'state': 'started',
                        'enabled': True
                    },
                    'register': 'svc_result',
                    'until': 'svc_result is succeeded',
                    'retries': '{{ max_retries }}',
                    'delay': '{{ retry_delay }}'
                },
                {
                    'name': 'Ensure port 80 is open',
                    'ansible.posix.firewalld': {
                        'port': '80/tcp',
                        'state': 'enabled',
                        'permanent': True,
                        'immediate': True
                    }
                },
                {
                    'name': 'Verify service recovery',
                    'ansible.builtin.uri': {
                        'url': 'http://localhost',
                        'status_code': 200
                    },
                    'register': 'verify',
                    'until': 'verify.status == 200',
                    'retries': '{{ max_retries }}',
                    'delay': '{{ retry_delay }}'
                }
            ]
        }
    ]

    try:
        yaml_output = yaml.dump(playbook_data, sort_keys=False, indent=2)
        print("    ✅ Ansible Playbook generated successfully.")
        return yaml_output
    except Exception as e:
        print(f"    ❌ Error generating YAML: {e}")
        return None


## Undergoing all steps 

In [33]:
def main():
    """Main function to run the full AIOps workflow."""
    s3_client = boto3.client('s3', endpoint_url=f"http://{MINIO_ENDPOINT}", aws_access_key_id=MINIO_ACCESS_KEY, aws_secret_access_key=MINIO_SECRET_KEY)
    
    if not setup_minio_buckets(s3_client, [LOGS_BUCKET, REPORTS_BUCKET, PLAYBOOKS_BUCKET]):
        sys.exit("Pipeline stopped: Failed to set up MinIO buckets.")

    log_content = read_log_from_minio(s3_client, LOGS_BUCKET, LOG_FILE)
    if not log_content:
        sys.exit("Pipeline stopped: Could not retrieve logs.")
        
    incident_report = query_model_for_report(log_content)
    if not incident_report:
        sys.exit("Pipeline stopped: Could not generate AI report.")
        
    report_file = save_report_to_json(incident_report)
    if not report_file:
        sys.exit("Pipeline stopped: Failed to save local report.json.")

    if not upload_to_minio(s3_client, REPORTS_BUCKET, "incident_report.txt", incident_report):
        sys.exit("Pipeline stopped: Failed to upload report to MinIO.")
        
    keywords = extract_and_save_keywords(report_file)
    if not keywords:
        sys.exit("Pipeline stopped: Failed to extract keywords.")
        
    ansible_playbook = generate_ansible_playbook(keywords.get("affected_host"), keywords.get("affected_service"))
    if not ansible_playbook:
        sys.exit("Pipeline stopped: Could not generate playbook.")
        
    if not upload_to_minio(s3_client, PLAYBOOKS_BUCKET, "remediation_playbook.yml", ansible_playbook):
        sys.exit("Pipeline stopped: Failed to upload playbook.")

    print("\n--- FINAL ACTIONABLE PLAYBOOK ---")
    print(ansible_playbook)

if __name__ == "__main__":
    print("==============================================")
    print("      AIOps PIPELINE: LOGS TO PLAYBOOK")
    print("==============================================")
    main()
    print("\n==============================================")
    print("            PIPELINE COMPLETE")
    print("==============================================")

      AIOps PIPELINE: LOGS TO PLAYBOOK
--> Step 1: Setting up MinIO buckets...
    ✅ Bucket 'logs' already exists.
    ✅ Bucket 'report' already exists.
    ✅ Bucket 'playbook' already exists.

--> Step 2: Reading 'service_error_down.txt' from bucket 'logs'...
    ✅ Successfully read log content from MinIO.

--> Step 2: Querying AI model for full report...




    ✅ AI analysis complete.

--> Step 3: Saving full report to 'report.json'...
    ✅ Successfully saved full report.
--> Uploading 'incident_report.txt' to bucket 'report'...
    ✅ Successfully uploaded 'incident_report.txt'.

--> Step 6: Extracting keywords from 'report.json'...
    ✅ Extracted Host:    'aiops'
    ✅ Affected Service: 'httpd'
    ✅ Successfully saved keywords to 'extracted_keywords.json'.

--> Step 6: Generating Ansible playbook...
    ✅ Ansible Playbook generated successfully.
--> Uploading 'remediation_playbook.yml' to bucket 'playbook'...
    ✅ Successfully uploaded 'remediation_playbook.yml'.

--- FINAL ACTIONABLE PLAYBOOK ---
- name: Restore HTTPD Service
  hosts: aiops
  become: true
  vars:
    max_retries: 3
    retry_delay: 10
  tasks:
  - name: Ensure httpd is installed
    ansible.builtin.dnf:
      name: httpd
      state: present
  - name: Start and enable httpd service
    ansible.builtin.service:
      name: httpd
      state: started
      enabled: tr