In [3]:
import os
import re
import uuid
import json
from datetime import datetime
from typing import List, Optional, Dict

import pandas as pd
from fastapi import FastAPI, UploadFile, File, Form
from fastapi.responses import JSONResponse
from pydantic import BaseModel
import openai

openai.api_key = os.getenv("my_open_ai_key", "sk‑demo") #blurred out the key so it won't exposed

app = FastAPI(title="AI Log Summarizer", version="1.0.0")


PRIORITY_MAP = {"ERROR": "High", "WARNING": "Medium", "INFO": "Low"}

EMAIL_RE = r"[\w\.-]+@[\w\.-]+"
IP_RE = r"\b(?:\d{1,3}\.){3}\d{1,3}\b"

INVALID_IP_RE = r"\b(?:\d{1,3}\.){1,2}\d*\b"  # catches partial IPs
EMAIL_PATTERN = re.compile(EMAIL_RE)
IP_PATTERN = re.compile(IP_RE)


def parse_log_line(line: str):
    """Return timestamp, level, raw message."""
    m = re.match(r"\[(.*?)\]\s+(\w+):\s+(.*)", line)
    if m:
        return m.group(1), m.group(2), m.group(3)
    return None, None, line.strip()


def is_invalid_ip(ip: str) -> bool:
    return not re.fullmatch(IP_RE, ip)


def is_invalid_email(email: str) -> bool:
    if not re.fullmatch(EMAIL_RE, email):
        return True
    if ".." in email or email.count('@') != 1:
        return True
    return False


def mask_tokens(message: str, email_map: Dict[str, str], ip_map: Dict[str, str]):
    """Replace sensitive IDs with neutral tokens & build mapping."""

    def _replace(match, _map, prefix):
        item = match.group(0)
        if item not in _map:
            _map[item] = f"{prefix}{len(_map) + 1:02d}"
        return _map[item]

    msg = re.sub(EMAIL_RE, lambda m: _replace(m, email_map, "USERNAME-"), message)
    msg = re.sub(IP_RE, lambda m: _replace(m, ip_map, "IP-ADDRESS-"), msg)
    return msg


class LogSummaryRequest(BaseModel):
    session_id: Optional[str] = None  # reused for refinement
    priority: Optional[str] = "Low"   # user‑set if desired

class Feedback(BaseModel):
    session_id: str
    rating: int  # 1‑5 star
    comment: Optional[str] = None


SESSIONS: Dict[str, Dict] = {}


@app.post("/upload")
async def upload_log(file: UploadFile = File(...)):
    """Parse & store uploaded log; return session_id."""
    raw = (await file.read()).decode("utf‑8", errors="ignore").splitlines()
    email_map, ip_map, rows = {}, {}, []

    for line in raw:
        ts, lvl, msg = parse_log_line(line)
        if ts is None:
            continue
        masked = mask_tokens(msg, email_map, ip_map)
        priority = PRIORITY_MAP.get(lvl, "Low")
        invalid = any(is_invalid_ip(ip) for ip in re.findall(IP_RE, msg)) or \
                  any(is_invalid_email(em) for em in re.findall(EMAIL_RE, msg))
        rows.append((ts, lvl, masked, priority, invalid))

    df = pd.DataFrame(rows, columns=["timestamp", "level", "masked_message", "priority", "invalid"])
    df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")

    session_id = str(uuid.uuid4())
    SESSIONS[session_id] = {
        "df": df,
        "email_map": email_map,
        "ip_map": ip_map,
        "history": [],  # GPT conversation history
        "feedback": []
    }
    return {"session_id": session_id, "message": "Log uploaded and parsed."}


@app.post("/summarize/{session_id}")
async def summarize(session_id: str, body: LogSummaryRequest):
    """Generate or refine summary using GPT with session context."""
    if session_id not in SESSIONS:
        return JSONResponse({"error": "Invalid session"}, status_code=404)

    df = SESSIONS[session_id]["df"]
    # Basic aggregation (could be expanded)
    grouped = df.groupby(["level", "masked_message", "priority"]).size().reset_index(name="count")

    prompt = (
        "You are LogGPT. Summarize the following log clusters with potential root causes."
        "\nFormat: LEVEL | PRIORITY | MSG | COUNT.\n"
    )
    for _, row in grouped.iterrows():
        prompt += f"{row['level']} | {row['priority']} | {row['masked_message']} | {row['count']}\n"

    # Include previous iterations for refinement
    history = SESSIONS[session_id]["history"]
    if history:
        prompt = history[-1] + "\nUSER_FEEDBACK: Please refine based on above.\n" + prompt

    # Call OpenAI
    try:
        rsp = openai.ChatCompletion.create(
            model="gpt-3.5-turbo", messages=[{"role": "user", "content": prompt}],
            max_tokens=400
        )
        summary = rsp.choices[0].message.content.strip()
    except Exception as e:
        summary = f"[OpenAI Error] {e}"

    history.append(prompt)
    SESSIONS[session_id]["history"] = history
    return {"summary": summary, "clusters": len(grouped), "priority_count": grouped.priority.value_counts().to_dict()}


@app.post("/feedback")
async def feedback(body: Feedback):
    """Store user rating; simple weight adj placeholder."""
    sess = SESSIONS.get(body.session_id)
    if not sess:
        return JSONResponse({"error": "Invalid session id"}, status_code=404)
    sess["feedback"].append({"rating": body.rating, "comment": body.comment})
    return {"status": "Feedback recorded"}


@app.get("/session/{session_id}")
async def session_info(session_id: str):
    if session_id not in SESSIONS:
        return JSONResponse({"error": "Invalid session"}, status_code=404)
    s = SESSIONS[session_id]
    return {
        "rows": len(s["df"]),
        "history_entries": len(s["history"]),
        "feedback_count": len(s["feedback"])
    }


ModuleNotFoundError: No module named 'openai'