<a href="https://colab.research.google.com/github/idoo25/CloudProject_Unicorn/blob/master/HW3_Unicorn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🌿 CloudGarden - Smart Plant Care System

## Layered Architecture Overview

This notebook follows a **Layered Architecture** pattern for maintainability and scalability:

| Layer | Description | Contents |
|-------|-------------|----------|
| **Layer 1** | Dependencies & Configuration | Package installation, imports, API keys, constants |
| **Layer 2** | Data Access Layer (DAL) | Firebase operations, external API clients |
| **Layer 3** | Business Logic Layer (BLL) | Data processing, ML models, analytics |
| **Layer 4** | Service Layer | Microservices, report generation |
| **Layer 5** | Presentation Layer | Gradio UI components for each tab |
| **Layer 6** | Application Layer | App builder and launch |

---

# 📦 LAYER 1: Dependencies & Configuration

This layer contains all package installations, imports, and configuration constants.
All dependencies are centralized here to avoid duplication throughout the notebook.

## 1.1 Package Installation

In [None]:
# ============================================================================
# Package Installation - Run this cell first
# ============================================================================
!pip install -q --upgrade gradio pandas matplotlib python-docx
!pip install -q --upgrade firebase-admin plotly gdown beautifulsoup4
!pip install -q cerebras-cloud-sdk google-genai
!pip install -q fastapi uvicorn python-dotenv nltk

## 1.2 Global Imports

All imports are consolidated here to prevent duplication.

In [None]:
# ============================================================================
# GLOBAL IMPORTS - Consolidated to avoid duplication
# ============================================================================

# Standard Library
import os
import re
import sys
import json
import time
import math
import random
import tempfile
import subprocess
import warnings
from io import BytesIO
from datetime import datetime, timedelta, timezone
from zoneinfo import ZoneInfo
from collections import Counter, defaultdict
from typing import List, Dict, Tuple, Any, Optional
from urllib.parse import quote

# Data Processing
import numpy as np
import pandas as pd

# Visualization
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

# Web & API
import requests
from bs4 import BeautifulSoup

# Document Generation
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.shared import Inches, Pt, RGBColor

# Firebase
import firebase_admin
from firebase_admin import credentials, db

# Machine Learning
import gradio as gr
from transformers import pipeline

# NLP
import nltk
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords

# AI/LLM Services
from cerebras.cloud.sdk import Cerebras
from google import genai
from google.genai import types

# API Services
from fastapi import FastAPI
from fastapi.responses import Response
from pydantic import BaseModel
from dotenv import load_dotenv

# Google Colab
try:
    from google.colab import drive, files
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

import gdown

# Suppress warnings and initialize NLP
warnings.filterwarnings('ignore')
nltk.download("stopwords", quiet=True)
stemmer = PorterStemmer()
stop_words = set(stopwords.words("english"))

print("✅ All imports loaded successfully")

## 1.3 Configuration Constants

In [None]:
# ============================================================================
# CONFIGURATION CONSTANTS
# ============================================================================

# --- Server Configuration ---
FEED = "json"
BASE_URL = "https://server-cloud-v645.onrender.com/"
BATCH_LIMIT = 200

# --- Firebase Configuration ---
FIREBASE_KEY_ID = '1ESnh8BIbGKrVEijA9nKNgNJNdD5kAaYC'
FIREBASE_KEY_FILE = 'firebase_key.json'
FIREBASE_URL = "https://cloud-81451-default-rtdb.europe-west1.firebasedatabase.app/"

# --- API Key Drive IDs ---
CEREBRAS_KEY_DRIVE_ID = "1zOdWD70pxR_BKBW8vMU3FTN1MqL1fkYD"
RAG_CEREBRAS_KEY_DRIVE_ID = "1vuCquKjZPwCHVNbRoLfAay7MxI7MjOru"
GEMINI_KEY_DRIVE_ID = "1eC4l2drO8dL8S8M50wD7TWs8iLXHlWT-"

# --- Model Configuration ---
REPORT_MODEL_NAME = "llama3.1-8b"
RAG_MODEL_ID = "llama3.1-8b"
GEMINI_MODEL_ID = "gemini-2.5-flash"
PLANT_DISEASE_MODEL = "linkanjarad/mobilenet_v2_1.0_224-plant-disease-identification"

# --- Microservice Configuration ---
REPORT_SERVICE_HOST = "127.0.0.1"
REPORT_SERVICE_PORT = 8001
REPORT_SERVICE_URL = f"http://{REPORT_SERVICE_HOST}:{REPORT_SERVICE_PORT}/generate-docx"

# --- Timezone ---
TZ_NAME = "Asia/Jerusalem"

# --- Visualization Colors ---
COLORS = {
    'temperature': {'color': '#ef4444'},
    'humidity': {'color': '#3b82f6'},
    'soil': {'color': '#8b5cf6'}
}
COLOR_TEMP = COLORS['temperature']['color']
COLOR_HUM = COLORS['humidity']['color']
COLOR_SOIL = COLORS['soil']['color']

STATUS_OK_COLOR = "#2ca02c"
STATUS_WARN_COLOR = "#ffbf00"
STATUS_BAD_COLOR = "#d62728"

# --- Sensor Thresholds ---
SENSOR_THRESHOLDS = {
    'temperature': {'low': 18, 'high': 32, 'margin': 1},
    'humidity': {'low': 35, 'high': 75, 'margin': 3},
    'soil': {'low': 20, 'high': 60, 'margin': 3}
}

# --- RAG Document URLs ---
DOC_URLS = [
    "https://doi.org/10.1038/s41598-025-20629-y",
    "https://doi.org/10.3389/fpls.2016.01419",
    "https://doi.org/10.1038/s41598-025-05102-0",
    "https://doi.org/10.1038/s41598-025-04758-y",
    "https://doi.org/10.2174/0118743315321139240627092707",
]

# --- Firebase Index Paths ---
INDEX_PATH = "indexes/public_index"
MAP_PATH = "indexes/doc_map"
TEXT_PATH = "indexes/doc_text"

print("✅ Configuration constants loaded")

## 1.4 CSS Styling

In [None]:
# ============================================================================
# CUSTOM CSS STYLING
# ============================================================================

CUSTOM_CSS = """
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
* { font-family: 'Inter', sans-serif; }

.kpi-card {
    background: white;
    padding: 24px;
    border-radius: 12px;
    box-shadow: 0 1px 3px rgba(0,0,0,0.12);
    text-align: center;
    border-left: 4px solid;
}
.kpi-label { color: #6b7280; font-size: 14px; font-weight: 600; }
.kpi-value { font-size: 48px; font-weight: 700; color: #1f2937; }
.trend-up { color: #10b981; }
.trend-down { color: #ef4444; }

.explanation-card {
    padding: 16px;
    border-radius: 10px;
    margin-bottom: 16px;
    color: white;
}
.explanation-card h3 { margin: 0 0 8px 0; font-size: 16px; }
.explanation-card p { margin: 4px 0; font-size: 14px; opacity: 0.95; }

.status-badge {
    display: inline-flex;
    align-items: center;
    padding: 4px 12px;
    background: #10b981;
    color: white;
    border-radius: 20px;
    font-weight: 600;
    font-size: 12px;
}

.status-dot {
    width: 8px;
    height: 8px;
    background: white;
    border-radius: 50%;
    margin-right: 6px;
    animation: pulse 1.5s infinite;
}

@keyframes pulse {
    0%, 100% { opacity: 1; }
    50% { opacity: 0.5; }
}
"""

print("✅ CSS styling loaded")

# 🗄️ LAYER 2: Data Access Layer (DAL)

This layer handles all external data operations:
- Firebase CRUD operations
- External API communication
- Data fetching and persistence

## 2.1 Credential Management

In [None]:
# ============================================================================
# CREDENTIAL MANAGEMENT
# ============================================================================

def download_api_key_from_drive(file_id: str) -> str:
    """Download an API key from Google Drive using file ID."""
    url = f"https://drive.google.com/uc?id={file_id}&export=download"
    response = requests.get(url, timeout=60)
    if not response.ok:
        raise RuntimeError(f"Failed to download API key: {response.status_code}")
    return response.text.strip()

def download_file_from_drive(file_id: str, output_path: str) -> str:
    """Download a file from Google Drive using gdown."""
    url = f'https://drive.google.com/uc?id={file_id}'
    gdown.download(url, output_path, quiet=False, fuzzy=True)
    return output_path

# Global API key storage
CEREBRAS_API_KEY = None
RAG_API_KEY = None
GEMINI_API_KEY = None

def initialize_api_keys():
    """Load all API keys from Google Drive."""
    global CEREBRAS_API_KEY, RAG_API_KEY, GEMINI_API_KEY
    
    print("📥 Downloading API keys...")
    CEREBRAS_API_KEY = download_api_key_from_drive(CEREBRAS_KEY_DRIVE_ID)
    assert CEREBRAS_API_KEY, "Cerebras API key is empty"
    print("✓ Cerebras API key loaded")
    
    RAG_API_KEY = download_api_key_from_drive(RAG_CEREBRAS_KEY_DRIVE_ID)
    assert RAG_API_KEY, "RAG API key is empty"
    print("✓ RAG API key loaded")
    
    GEMINI_API_KEY = download_api_key_from_drive(GEMINI_KEY_DRIVE_ID)
    assert GEMINI_API_KEY, "Gemini API key is empty"
    print("✓ Gemini API key loaded")
    return True

print("✅ Credential management loaded")

## 2.2 Firebase Operations

In [None]:
# ============================================================================
# FIREBASE OPERATIONS
# ============================================================================

_firebase_initialized = False

def initialize_firebase():
    """Initialize Firebase connection."""
    global _firebase_initialized
    
    if _firebase_initialized or firebase_admin._apps:
        print("✓ Firebase already initialized")
        return True
    
    if os.path.exists(FIREBASE_KEY_FILE):
        os.remove(FIREBASE_KEY_FILE)
    
    print("📥 Downloading Firebase credentials...")
    try:
        download_file_from_drive(FIREBASE_KEY_ID, FIREBASE_KEY_FILE)
        with open(FIREBASE_KEY_FILE, 'r') as f:
            creds = json.load(f)
        print(f'✓ Project: {creds.get("project_id")}')
    except Exception as e:
        print(f'⚠️ Error: {e}')
        if IN_COLAB:
            uploaded = files.upload()
            if uploaded:
                os.rename(list(uploaded.keys())[0], FIREBASE_KEY_FILE)
    
    firebase_admin.initialize_app(
        credentials.Certificate(FIREBASE_KEY_FILE),
        {'databaseURL': FIREBASE_URL}
    )
    _firebase_initialized = True
    print("✅ Firebase initialized")
    return True

def firebase_get(path: str) -> Any:
    """Read data from Firebase via HTTP GET."""
    url = f"{FIREBASE_URL.rstrip('/')}/{path}.json"
    r = requests.get(url, timeout=30)
    if r.status_code != 200:
        raise RuntimeError(f"GET {path} failed: {r.status_code}")
    return r.json()

def firebase_put(path: str, data: Any) -> Tuple[int, str]:
    """Write data to Firebase via HTTP PUT."""
    url = f"{FIREBASE_URL.rstrip('/')}/{path}.json"
    r = requests.put(url, json=data, timeout=30)
    if r.status_code != 200:
        raise RuntimeError(f"PUT {path} failed: {r.status_code}")
    return r.status_code, r.text

def load_sensor_data_from_firebase() -> pd.DataFrame:
    """Load all sensor data from Firebase as DataFrame."""
    data = db.reference('/sensor_data').get()
    if not data:
        return pd.DataFrame()
    
    df = pd.DataFrame([{
        'timestamp': pd.to_datetime(v['created_at']),
        'temperature': float(v['temperature']),
        'humidity': float(v['humidity']),
        'soil': float(v['soil'])
    } for v in data.values()])
    
    df = df.sort_values('timestamp').reset_index(drop=True)
    df['humidity'] = df['humidity'].clip(0, 100)
    df['soil'] = df['soil'].clip(0, 100)
    df['temperature'] = df['temperature'].clip(-50, 100)
    return df

def get_latest_timestamp_from_firebase() -> Optional[str]:
    """Get the latest timestamp from Firebase."""
    try:
        latest = db.reference('/sensor_data').order_by_child('created_at').limit_to_last(1).get()
        return list(latest.values())[0]['created_at'] if latest else None
    except Exception:
        return None

def save_sensor_data_to_firebase(data_list: List[Dict]) -> int:
    """Save sensor data to Firebase."""
    if not data_list:
        return 0
    ref = db.reference('/sensor_data')
    saved = 0
    for sample in data_list:
        try:
            vals = json.loads(sample['value'])
            timestamp_key = sample['created_at'].replace(':', '-').replace('.', '-')
            ref.child(timestamp_key).set({
                'created_at': sample['created_at'],
                'temperature': max(-50, min(100, float(vals['temperature']))),
                'humidity': max(0, min(100, float(vals['humidity']))),
                'soil': max(0, min(100, float(vals['soil'])))
            })
            saved += 1
        except Exception:
            continue
    return saved

print("✅ Firebase operations loaded")

## 2.3 External API Client

In [None]:
# ============================================================================
# EXTERNAL API CLIENT
# ============================================================================

def fetch_iot_data(feed: str, limit: int) -> Optional[pd.DataFrame]:
    """Fetch IoT sensor data from external server."""
    try:
        resp = requests.get(f"{BASE_URL}/history", params={"feed": feed, "limit": limit}, timeout=30)
        data = resp.json()
        if "data" not in data or not data["data"]:
            return None
        df = pd.DataFrame(data["data"])
        if "created_at" not in df.columns or "value" not in df.columns:
            return None
        df["created_at"] = pd.to_datetime(df["created_at"], errors="coerce", utc=True)
        df["value"] = pd.to_numeric(df["value"], errors="coerce")
        df = df.dropna(subset=["created_at", "value"]).sort_values("created_at")
        return None if df.empty else df
    except Exception as e:
        print(f"Error fetching IoT data: {e}")
        return None

def fetch_batch_from_server(before_timestamp: Optional[str] = None) -> Dict:
    """Fetch a batch of data from the IoT server."""
    params = {"feed": FEED, "limit": BATCH_LIMIT}
    if before_timestamp:
        params["before_created_at"] = before_timestamp
    try:
        return requests.get(f"{BASE_URL}/history", params=params, timeout=180).json()
    except Exception:
        return {}

def sync_new_data_from_server() -> Tuple[str, int]:
    """Sync new data from IoT server to Firebase."""
    msgs = ["Starting sync..."]
    latest = get_latest_timestamp_from_firebase()
    msgs.append(f"Latest: {latest}" if latest else "No existing data")
    resp = fetch_batch_from_server()
    if "data" not in resp:
        return "\n".join(msgs + ["Error fetching data"]), 0
    new_samples = [s for s in resp["data"] if not latest or s["created_at"] > latest]
    if new_samples:
        saved = save_sensor_data_to_firebase(new_samples)
        return "\n".join(msgs + [f"Found {len(new_samples)} new", f"Saved {saved}!"]), saved
    return "\n".join(msgs + ["No new data"]), 0

print("✅ External API client loaded")

## 2.4 HTTP Session & Web Scraping

In [None]:
# ============================================================================
# HTTP SESSION & WEB SCRAPING (for RAG)
# ============================================================================

http_session = requests.Session()
BROWSER_HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
    "Accept-Language": "en-US,en;q=0.9",
}

_STOP_SECTION_TITLES = {
    "references", "reference", "bibliography", "acknowledgements", "acknowledgments",
    "author information", "ethics declarations", "additional information",
    "supplementary information", "rights and permissions", "about this article",
    "availability of data and materials", "data availability", "publisher's note"
}

_SKIP_LINE_RES = [
    re.compile(r"(?i)^\s*cite this article\s*$"),
    re.compile(r"(?i)^\s*google\s+scholar\s*$"),
    re.compile(r"(?i)creative\s+commons"),
    re.compile(r"(?i)springer\s+nature"),
]

def _normalize_doi(doi_or_url: str) -> str:
    s = (doi_or_url or "").strip()
    return s.replace("https://doi.org/", "").replace("http://doi.org/", "").strip()

def fetch_html(url: str, timeout: int = 15) -> Tuple[Optional[str], str, int]:
    try:
        r = http_session.get(url, headers=BROWSER_HEADERS, timeout=timeout, allow_redirects=True)
        return r.text, r.url, r.status_code
    except Exception:
        return None, url, 0

def extract_main_text_from_html(html: str) -> str:
    if not html:
        return ""
    soup = BeautifulSoup(html, "html.parser")
    for element in soup(["script", "style", "nav", "header", "footer"]):
        element.decompose()
    main_content = soup.find("main") or soup.find("article") or soup.find("body")
    return main_content.get_text(separator=" ", strip=True) if main_content else soup.get_text(separator=" ", strip=True)

def postprocess_document_text(text: str) -> str:
    if not text:
        return ""
    lines = text.split("\n")
    clean_lines = []
    for line in lines:
        line = line.strip()
        if not line or any(pat.match(line) for pat in _SKIP_LINE_RES):
            continue
        if line.lower() in _STOP_SECTION_TITLES:
            break
        clean_lines.append(line)
    return " ".join(clean_lines)

print("✅ HTTP session & web scraping loaded")

# ⚙️ LAYER 3: Business Logic Layer (BLL)

This layer contains all business logic:
- Data processing and transformations
- ML model operations
- Analytics and calculations
- No direct database access (uses DAL)

## 3.1 Data Processing Utilities

In [None]:
# ============================================================================
# DATA PROCESSING UTILITIES
# ============================================================================

def normalize_series(series: pd.Series) -> pd.Series:
    """Normalize a series to 0-1 range."""
    mn, mx = float(series.min()), float(series.max())
    if mx - mn == 0:
        return series * 0.0
    return (series - mn) / (mx - mn)

def records_to_df(records: List[Dict[str, Any]]) -> pd.DataFrame:
    """Convert records list to DataFrame."""
    if not records:
        return pd.DataFrame(columns=["created_at", "value"])
    df = pd.DataFrame(records)
    if "created_at" not in df.columns or "value" not in df.columns:
        return pd.DataFrame(columns=["created_at", "value"])
    return df

def df_to_records(df: pd.DataFrame) -> List[Dict]:
    """Convert DataFrame to records list."""
    if df is None or df.empty:
        return []
    out = df[["created_at", "value"]].copy()
    out["created_at"] = out["created_at"].astype(str)
    return out.to_dict("records")

def unify_sensor_dfs(dfs: Dict[str, pd.DataFrame]) -> pd.DataFrame:
    """Unify multiple sensor DataFrames into one."""
    def prep(df, col):
        if df is None or df.empty:
            return pd.DataFrame(columns=["timestamp", col])
        out = df.copy()
        if "timestamp" not in out.columns and "created_at" in out.columns:
            out = out.rename(columns={"created_at": "timestamp"})
        if "timestamp" not in out.columns:
            out = out.reset_index().rename(columns={"index": "timestamp"})
        if "timestamp" not in out.columns or "value" not in out.columns:
            return pd.DataFrame(columns=["timestamp", col])
        out = out[["timestamp", "value"]]
        ts = out["timestamp"]
        if pd.api.types.is_numeric_dtype(ts) or ts.astype(str).str.fullmatch(r"\d+").all():
            ts_num = pd.to_numeric(ts, errors="coerce")
            unit = "ms" if ts_num.dropna().astype(int).astype(str).str.len().median() >= 13 else "s"
            out["timestamp"] = pd.to_datetime(ts_num, errors="coerce", unit=unit, utc=True).dt.tz_convert("Asia/Jerusalem").dt.tz_localize(None)
        else:
            out["timestamp"] = pd.to_datetime(ts, errors="coerce", utc=True).dt.tz_convert("Asia/Jerusalem").dt.tz_localize(None)
        out = out.dropna(subset=["timestamp"])
        out["value"] = pd.to_numeric(out["value"], errors="coerce")
        out = out.dropna(subset=["value"])
        return out.rename(columns={"value": col})
    
    t = prep(dfs.get("temperature"), "temperature")
    h = prep(dfs.get("humidity"), "humidity")
    s = prep(dfs.get("soil"), "soil")
    df = t.merge(h, on="timestamp", how="outer").merge(s, on="timestamp", how="outer")
    return df.sort_values("timestamp").reset_index(drop=True)

print("✅ Data processing utilities loaded")

## 3.2 Plant Status Analysis

In [None]:
# ============================================================================
# PLANT STATUS ANALYSIS
# ============================================================================

def analyze_sensor_values(temp: float, hum: float, soil: float) -> Tuple[List[str], List[str]]:
    """Analyze sensor values and return issues and warnings."""
    issues, warnings_list = [], []
    checks = [
        ("Temperature", temp, SENSOR_THRESHOLDS['temperature']),
        ("Air humidity", hum, SENSOR_THRESHOLDS['humidity']),
        ("Soil moisture", soil, SENSOR_THRESHOLDS['soil']),
    ]
    for name, value, thresholds in checks:
        low, high, margin = thresholds['low'], thresholds['high'], thresholds['margin']
        if not (low <= value <= high):
            issues.append(f"{name} out of range ({value:.1f})")
        elif value <= low + margin or value >= high - margin:
            warnings_list.append(f"{name} near threshold ({value:.1f})")
    return issues, warnings_list

def get_plant_status(issues: List[str], warnings_list: List[str]) -> Tuple[str, str]:
    """Get overall plant status based on issues and warnings."""
    if issues:
        return "🔴 Not OK", STATUS_BAD_COLOR
    elif warnings_list:
        return "🟡 Warning", STATUS_WARN_COLOR
    return "🟢 Healthy", STATUS_OK_COLOR

def plant_dashboard(limit: int):
    """Main plant dashboard analysis function."""
    try:
        dfs = {
            "temperature": fetch_iot_data("temperature", limit),
            "humidity": fetch_iot_data("humidity", limit),
            "soil": fetch_iot_data("soil", limit),
        }
        missing = [k for k, v in dfs.items() if v is None]
        if missing:
            return "⚠️ Partial Data", f"Missing sensors: {', '.join(missing)}", None, None, None, None

        temp = float(dfs["temperature"]["value"].iloc[-1])
        hum = float(dfs["humidity"]["value"].iloc[-1])
        soil = float(dfs["soil"]["value"].iloc[-1])

        issues, warnings_list = analyze_sensor_values(temp, hum, soil)
        status, _ = get_plant_status(issues, warnings_list)
        
        details = f"Temp: {temp:.1f}°C | Humidity: {hum:.1f}% | Soil: {soil:.1f}%\n"
        if issues:
            details += "Issues: " + ", ".join(issues)
        elif warnings_list:
            details += "Warnings: " + ", ".join(warnings_list)
        else:
            details += "All readings normal"

        # Create plots (simplified)
        import plotly.graph_objects as go
        plots = {}
        for name, df in dfs.items():
            if df is not None:
                fig = go.Figure()
                fig.add_trace(go.Scatter(x=df["created_at"], y=df["value"], mode='lines', name=name))
                fig.update_layout(title=f"{name.title()} Over Time", height=300)
                plots[name] = fig
        
        return status, details, plots.get("temperature"), plots.get("humidity"), plots.get("soil"), None
    except Exception as e:
        return f"❌ Error: {str(e)}", "", None, None, None, None

print("✅ Plant status analysis loaded")

## 3.3 Plant Disease Detection

In [None]:
# ============================================================================
# PLANT DISEASE DETECTION
# ============================================================================

# Initialize ML classifier
clf = pipeline("image-classification", model=PLANT_DISEASE_MODEL)

def analyze_plant_image(image, temp: float, humidity: float, soil: float) -> Tuple[str, str, str, str]:
    """Analyze plant image and environmental conditions."""
    preds = clf(image)
    top = preds[0]
    label, score = top["label"], top["score"]

    alerts, advice = [], []

    # Temperature checks
    if temp < SENSOR_THRESHOLDS['temperature']['low']:
        alerts.append("Low temperature")
        advice.append("Move plant to warmer environment")
    elif temp > SENSOR_THRESHOLDS['temperature']['high']:
        alerts.append("High temperature")
        advice.append("Move plant to shaded area")

    # Humidity checks
    if humidity < SENSOR_THRESHOLDS['humidity']['low']:
        alerts.append("Low air humidity")
        advice.append("Increase humidity (e.g. misting)")
    elif humidity > SENSOR_THRESHOLDS['humidity']['high']:
        alerts.append("High air humidity")
        advice.append("Improve ventilation")

    # Soil moisture checks
    if soil < SENSOR_THRESHOLDS['soil']['low']:
        alerts.append("Low soil moisture")
        advice.append("Water the plant")
    elif soil > SENSOR_THRESHOLDS['soil']['high']:
        alerts.append("High soil moisture")
        advice.append("Reduce watering")

    is_bad = "healthy" not in label.lower()
    status_html = f"""<div style='padding:10px;border-radius:10px;
        background:{'#ffdddd' if is_bad else '#ddffdd'};
        border:1px solid {'#ff0000' if is_bad else '#00aa00'};
        font-weight:700;'>
        {'🔴 Plant status: BAD' if is_bad else '🟢 Plant status: GOOD'}
    </div>"""

    if not alerts:
        alerts.append("Status looks normal")

    return (
        f"Detected: {label} ({score:.2%})",
        status_html,
        "\n".join(alerts),
        "\n".join(advice)
    )

print("✅ Plant disease detection loaded")

## 3.4 NLP Processing

In [None]:
# ============================================================================
# NLP PROCESSING (for RAG)
# ============================================================================

def tokenize(text: str) -> List[str]:
    """Convert text to lowercase word tokens."""
    return re.findall(r"\w+", (text or "").lower())

def remove_stopwords(tokens: List[str], stop_words: set) -> List[str]:
    """Remove stop words from token list."""
    return [t for t in tokens if t not in stop_words]

def apply_stemming(tokens: List[str]) -> List[str]:
    """Apply Porter stemming to tokens."""
    return [stemmer.stem(t) for t in tokens]

def preprocess_query(query: str) -> List[str]:
    """Preprocess query for search."""
    tokens = tokenize(query)
    tokens = remove_stopwords(tokens, stop_words)
    return apply_stemming(tokens)

print("✅ NLP processing loaded")

## 3.5 RAG Index Operations

In [None]:
# ============================================================================
# RAG INDEX OPERATIONS
# ============================================================================

# Global stores
public_index = None
doc_map = None
doc_text = None
_DOC_TEXT_CACHE: Dict[int, str] = {}

def build_inverted_index(urls: List[str], stop_words: set, doc_text_map: Dict) -> Tuple[Dict, Dict]:
    """Build inverted index from document texts."""
    inverted = defaultdict(set)
    doc_map_local = {i: url for i, url in enumerate(urls)}
    
    for doc_id in range(len(urls)):
        text = doc_text_map.get(doc_id) or doc_text_map.get(str(doc_id)) or ""
        tokens = tokenize(text)
        tokens = remove_stopwords(tokens, stop_words)
        tokens = apply_stemming(tokens)
        for term in set(tokens):
            inverted[term].add(doc_id)
    
    inverted = {term: sorted(list(ids)) for term, ids in inverted.items()}
    return inverted, doc_map_local

def load_store_from_firebase(load_text: bool = False):
    """Load index from Firebase."""
    global public_index, doc_map, doc_text
    public_index = firebase_get(INDEX_PATH) or {}
    doc_map = firebase_get(MAP_PATH) or {}
    if isinstance(doc_map, list):
        doc_map = {str(i): v for i, v in enumerate(doc_map)}
    if load_text:
        doc_text = firebase_get(TEXT_PATH) or {}
    print(f"Loaded: terms={len(public_index)} | docs={len(doc_map)}")
    return public_index, doc_map, doc_text

def get_doc_text(doc_id: int) -> str:
    """Get document text from Firebase."""
    if doc_id in _DOC_TEXT_CACHE:
        return _DOC_TEXT_CACHE[doc_id]
    try:
        v = firebase_get(f"indexes/doc_text/{str(int(doc_id))}")
        if v:
            _DOC_TEXT_CACHE[doc_id] = str(v).strip()
            return _DOC_TEXT_CACHE[doc_id]
    except Exception as e:
        print(f"Error getting doc text: {e}")
    return ""

def search_top_k(query: str, k: int = 3) -> Tuple[List[str], List[Dict]]:
    """Search for top-k relevant documents."""
    if public_index is None or doc_map is None:
        load_store_from_firebase(load_text=False)
    
    q_terms = preprocess_query(query)
    scores = defaultdict(int)
    
    for term in q_terms:
        for doc_id in (public_index.get(term, []) or []):
            scores[int(doc_id)] += 1
    
    ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)[:k]
    results = [{"doc_id": doc_id, "score": score, "url": doc_map.get(str(doc_id))} 
               for doc_id, score in ranked]
    return q_terms, results

print("✅ RAG index operations loaded")

## 3.6 BM25 Ranking

In [None]:
# ============================================================================
# BM25 RANKING
# ============================================================================

def bm25_rank(query: str, k: int = 3, b: float = 0.75, k1: float = 1.5) -> Tuple[List[str], List[Tuple[int, float]]]:
    """BM25 ranking algorithm."""
    if public_index is None or doc_map is None:
        load_store_from_firebase(load_text=False)
    
    q_terms = preprocess_query(query)
    all_ids = [int(k) for k in doc_map.keys() if str(k).isdigit()]
    N = len(all_ids)
    
    if N == 0:
        return q_terms, []
    
    # Calculate document lengths
    doc_lens = {}
    for doc_id in all_ids:
        text = get_doc_text(doc_id)
        doc_lens[doc_id] = len(tokenize(text))
    
    avgdl = sum(doc_lens.values()) / max(N, 1)
    scores = defaultdict(float)
    
    for term in q_terms:
        doc_ids = public_index.get(term, [])
        df = len(doc_ids)
        if df == 0:
            continue
        idf = math.log((N - df + 0.5) / (df + 0.5) + 1)
        
        for doc_id in doc_ids:
            doc_id = int(doc_id)
            dl = doc_lens.get(doc_id, 1)
            tf = 1  # Simplified
            numerator = tf * (k1 + 1)
            denominator = tf + k1 * (1 - b + b * dl / avgdl)
            scores[doc_id] += idf * numerator / denominator
    
    ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)[:k]
    return q_terms, ranked

print("✅ BM25 ranking loaded")

## 3.7 LLM Integration

In [None]:
# ============================================================================
# LLM INTEGRATION
# ============================================================================

# Cerebras client for RAG
c_client = None

def init_cerebras_client():
    """Initialize Cerebras client."""
    global c_client
    if RAG_API_KEY:
        c_client = Cerebras(api_key=RAG_API_KEY)
        print("✓ Cerebras client initialized")
    return c_client

def llm_generate(prompt: str, temperature: float = 0.3, max_tokens: int = 500) -> str:
    """Generate text using Cerebras LLM."""
    if c_client is None:
        init_cerebras_client()
    if c_client is None:
        return "Error: LLM client not initialized"
    
    try:
        response = c_client.chat.completions.create(
            model=RAG_MODEL_ID,
            messages=[{"role": "user", "content": prompt}],
            temperature=temperature,
            max_tokens=max_tokens,
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"Error: {str(e)}"

print("✅ LLM integration loaded")

## 3.8 RAG Answer Generation

In [None]:
# ============================================================================
# RAG ANSWER GENERATION
# ============================================================================

def _chunk_text(text: str, max_chars: int = 4000) -> List[str]:
    """Split text into overlapping chunks."""
    if not text:
        return []
    overlap = 400
    chunks = []
    for i in range(0, len(text), max_chars - overlap):
        chunks.append(text[i : i + max_chars])
    return chunks

def _extract_evidence_from_chunk(question: str, doc_id: int, chunk_id: int, chunk_text: str) -> Dict[str, Any]:
    """Extract evidence from a text chunk using LLM."""
    prompt = f"""You are an expert researcher. Analyze the TEXT to answer the QUESTION.
Return ONLY a JSON object with:
- 'found': boolean (true if answer found)
- 'answer': string (summary of findings)
- 'evidence': array of strings (supporting quotes)

QUESTION: {question}

TEXT: {chunk_text}"""

    raw = llm_generate(prompt, temperature=0.0)
    match = re.search(r"\{.*\}", raw, flags=re.DOTALL)
    if match:
        try:
            res = json.loads(match.group(0))
            res = {k.lower(): v for k, v in res.items()}
            res.update({"doc_id": doc_id, "chunk_id": chunk_id})
            return res
        except:
            pass
    return {"found": False, "doc_id": doc_id, "chunk_id": chunk_id}

def rag_answer_with_model(query: str, k: int = 3) -> Dict[str, Any]:
    """Generate RAG answer using retrieved documents."""
    q_terms, ranked = bm25_rank(query, k=k)
    if not ranked:
        return {"answer": "No relevant documents found.", "sources": []}
    
    evidence_packets = []
    sources = []
    
    for doc_id, score in ranked:
        text = get_doc_text(doc_id)
        if not text:
            continue
        
        url = doc_map.get(str(doc_id), "") if doc_map else ""
        sources.append(url)
        
        chunks = _chunk_text(text)
        for i, chunk in enumerate(chunks[:3]):  # Limit chunks
            evidence = _extract_evidence_from_chunk(query, doc_id, i, chunk)
            if evidence.get("found"):
                evidence_packets.append(evidence)
    
    if not evidence_packets:
        return {"answer": "Could not find specific information.", "sources": sources}
    
    # Synthesize final answer
    context = "\n".join([f"- {e.get('answer', '')}" for e in evidence_packets if e.get('answer')])
    synthesis_prompt = f"""Based on these findings, provide a comprehensive answer:

QUESTION: {query}

FINDINGS:
{context}

Provide a clear, professional answer in 2-3 paragraphs."""

    final_answer = llm_generate(synthesis_prompt, temperature=0.3, max_tokens=800)
    return {"answer": final_answer, "sources": sources}

print("✅ RAG answer generation loaded")

## 3.9 Gemini Chat

In [None]:
# ============================================================================
# GEMINI CHAT
# ============================================================================

gemini_client = None

def init_gemini_client():
    """Initialize Gemini client."""
    global gemini_client
    if GEMINI_API_KEY:
        gemini_client = genai.Client(api_key=GEMINI_API_KEY)
        print("✓ Gemini client initialized")
    return gemini_client

def build_system_instruction() -> str:
    return "You are a helpful, friendly chatbot. Answer clearly. Keep a natural conversation."

def gemini_turn(user_message: str, history: List[Dict], temperature: float = 0.7):
    """Process a Gemini chat turn."""
    user_message = (user_message or "").strip()
    if not user_message:
        return "", history, history
    
    if gemini_client is None:
        init_gemini_client()
    if gemini_client is None:
        return "", history + [{"role": "user", "content": user_message}, 
                              {"role": "assistant", "content": "Error: Client not initialized"}], history

    contents = []
    for msg in history:
        role = msg.get("role")
        text = (msg.get("content") or "").strip()
        if not text:
            continue
        gemini_role = "user" if role == "user" else "model"
        contents.append(types.Content(role=gemini_role, parts=[types.Part.from_text(text=text)]))
    
    contents.append(types.Content(role="user", parts=[types.Part.from_text(text=user_message)]))

    try:
        resp = gemini_client.models.generate_content(
            model=GEMINI_MODEL_ID,
            contents=contents,
            config=types.GenerateContentConfig(
                system_instruction=build_system_instruction(),
                temperature=temperature,
                max_output_tokens=512,
            ),
        )
        answer = (resp.text or "").strip() or "Could not generate answer."
    except Exception as e:
        answer = f"Error: {str(e)}"

    new_history = history + [
        {"role": "user", "content": user_message},
        {"role": "assistant", "content": answer},
    ]
    return "", new_history, new_history

print("✅ Gemini chat loaded")

## 3.10 Gamification Logic

In [None]:
# ============================================================================
# GAMIFICATION LOGIC
# ============================================================================

GAMIFICATION_REF = None

def init_gamification_ref():
    global GAMIFICATION_REF
    GAMIFICATION_REF = db.reference("gamification/global")
    return GAMIFICATION_REF

DEFAULT_PROFILE = {
    "points": 0,
    "spins_available": 0,
    "missions": {
        "sync_data": {"last_completed": None, "total_completed": 0},
        "analyze_plant": {"last_completed": None, "total_completed": 0},
        "generate_report": {"last_completed": None, "total_completed": 0},
    },
    "coupons": []
}

def _today_key() -> str:
    return datetime.now(ZoneInfo(TZ_NAME)).strftime("%Y-%m-%d")

def _now_iso() -> str:
    return datetime.now(ZoneInfo(TZ_NAME)).isoformat()

def _get_profile() -> Dict:
    if GAMIFICATION_REF is None:
        init_gamification_ref()
    p = GAMIFICATION_REF.get() or {}
    prof = {
        "points": int(p.get("points", 0)),
        "spins_available": int(p.get("spins_available", 0)),
        "missions": p.get("missions", {}) or {},
        "coupons": p.get("coupons", []) or [],
    }
    merged = {}
    for mid, base in DEFAULT_PROFILE["missions"].items():
        m = prof["missions"].get(mid) or {}
        merged[mid] = {
            "last_completed": m.get("last_completed"),
            "total_completed": int(m.get("total_completed", 0)),
        }
    prof["missions"] = merged
    return prof

def _save_profile(profile: Dict):
    if GAMIFICATION_REF is None:
        init_gamification_ref()
    GAMIFICATION_REF.set(profile)

def complete_mission(mission_id: str, points: int = 50) -> Tuple[str, int]:
    """Complete a mission and award points."""
    today = _today_key()
    profile = _get_profile()
    mission = profile["missions"].get(mission_id)
    
    if not mission:
        return f"Unknown mission: {mission_id}", 0
    
    if mission["last_completed"] == today:
        return f"Mission '{mission_id}' already completed today!", 0
    
    mission["last_completed"] = today
    mission["total_completed"] += 1
    profile["points"] += points
    profile["spins_available"] += 1
    
    _save_profile(profile)
    return f"✅ +{points} points! Spin earned!", points

def spin_wheel() -> Tuple[str, int]:
    """Spin the reward wheel."""
    profile = _get_profile()
    if profile["spins_available"] <= 0:
        return "No spins available!", 0
    
    rewards = [10, 20, 30, 50, 100, 0, 25, 15]
    reward = random.choice(rewards)
    profile["spins_available"] -= 1
    profile["points"] += reward
    
    if reward >= 50:
        code = f"COUPON-{random.randint(1000,9999)}"
        profile["coupons"].append({"code": code, "label": f"{reward}pts", "created_at": _now_iso()})
    
    _save_profile(profile)
    return f"🎰 You won {reward} points!", reward

def rewards_refresh():
    """Refresh rewards display."""
    profile = _get_profile()
    points = profile["points"]
    spins = profile["spins_available"]
    
    missions_md = ""
    for mid, m in profile["missions"].items():
        done_today = m.get("last_completed") == _today_key()
        status = "✅" if done_today else "⬜"
        missions_md += f"{status} **{mid}** (Total: {m.get('total_completed', 0)})\n"
    
    coupons_txt = "\n".join([f"- {c['code']}: {c['label']}" for c in profile["coupons"][-5:]]) or "None"
    
    return points, spins, missions_md, coupons_txt

print("✅ Gamification logic loaded")

## 3.11 Gamified Wrappers

In [None]:
# ============================================================================
# GAMIFIED WRAPPERS
# ============================================================================

def sync_screen_gamified() -> str:
    """Gamified sync function."""
    msg, count = sync_new_data_from_server()
    if count > 0:
        reward_msg, _ = complete_mission("sync_data")
        msg += f"\n{reward_msg}"
    return msg

def analyze_plant_gamified(image, temp, humidity, soil):
    """Gamified plant analysis."""
    result = analyze_plant_image(image, temp, humidity, soil)
    complete_mission("analyze_plant")
    return result

def generate_report_screen_gamified(limit: int) -> Tuple[str, Optional[str]]:
    """Gamified report generation."""
    try:
        dfs = {
            "temperature": fetch_iot_data("temperature", limit),
            "humidity": fetch_iot_data("humidity", limit),
            "soil": fetch_iot_data("soil", limit),
        }
        if all(df is None or df.empty for df in dfs.values()):
            return "No data available.", None
        
        out_path = call_report_microservice(dfs, limit)
        complete_mission("generate_report")
        return "✅ Report generated successfully!", out_path
    except Exception as e:
        return f"❌ Error: {str(e)}", None

def call_report_microservice(dfs: Dict, limit: int) -> str:
    """Call report microservice."""
    payload = {
        "limit": int(limit),
        "temperature": df_to_records(dfs.get("temperature")),
        "humidity": df_to_records(dfs.get("humidity")),
        "soil": df_to_records(dfs.get("soil")),
    }
    r = requests.post(REPORT_SERVICE_URL, json=payload, timeout=120)
    if not r.ok:
        raise RuntimeError(f"Error: {r.status_code} | {r.text}")
    
    fd, path = tempfile.mkstemp(suffix=".docx", prefix="daily_report_")
    os.close(fd)
    with open(path, "wb") as f:
        f.write(r.content)
    return path

print("✅ Gamified wrappers loaded")

## 3.12 Visualization Functions

In [None]:
# ============================================================================
# VISUALIZATION FUNCTIONS
# ============================================================================

SENSORS = [
    ('temperature', '°C', COLORS['temperature']['color'], 'TEMPERATURE'),
    ('humidity', '%', COLORS['humidity']['color'], 'HUMIDITY'),
    ('soil', '%', COLORS['soil']['color'], 'SOIL MOISTURE')
]

def create_kpi_card(label: str, value: float, unit: str, change: float, trend: str = "up", color: str = None) -> str:
    bc = color or COLORS['temperature']['color']
    icon = "↑" if trend == "up" else ("↓" if trend == "down" else "→")
    return f"""<div class="kpi-card" style="border-left-color: {bc};">
        <p class="kpi-label">{label}</p>
        <p class="kpi-value">{value:.1f}<span style="font-size: 24px;">{unit}</span></p>
        <p class="kpi-change trend-{trend}"><span>{icon}</span><span>{change:.1f}%</span></p>
    </div>"""

def create_kpi_cards(df: pd.DataFrame) -> str:
    if df.empty:
        return "<p>No data</p>"
    cards = []
    for col, unit, color, name in SENSORS:
        if col in df.columns:
            current = df[col].iloc[-1]
            change = ((current - df[col].mean()) / df[col].mean() * 100) if df[col].mean() != 0 else 0
            trend = "up" if change > 0 else ("down" if change < 0 else "neutral")
            cards.append(create_kpi_card(name, current, unit, abs(change), trend, color))
    return f"<div style='display:flex;gap:16px;'>{' '.join(cards)}</div>"

def create_stat_cards_html(df: pd.DataFrame) -> str:
    if df.empty:
        return "<p>No data</p>"
    stats = []
    for col, unit, color, name in SENSORS:
        if col in df.columns:
            stats.append(f"<div><b>{name}</b>: Mean={df[col].mean():.1f}{unit}, Std={df[col].std():.1f}</div>")
    return "".join(stats)

def create_time_series_plot(df: pd.DataFrame):
    if df.empty:
        return None
    fig = make_subplots(rows=3, cols=1, shared_xaxes=True, subplot_titles=['Temperature', 'Humidity', 'Soil'])
    for i, (col, unit, color, name) in enumerate(SENSORS, 1):
        if col in df.columns:
            fig.add_trace(go.Scatter(x=df['timestamp'], y=df[col], name=name, line=dict(color=color)), row=i, col=1)
    fig.update_layout(height=600, showlegend=True)
    return fig

def calculate_correlations(df: pd.DataFrame):
    if df.empty or len(df) < 3:
        return "<p>Not enough data</p>", None
    corr = df[['temperature', 'humidity', 'soil']].corr()
    fig = px.imshow(corr, text_auto=True, title="Correlation Matrix")
    card = f"<div>Temp-Humidity: {corr.loc['temperature','humidity']:.2f}</div>"
    return card, fig

def hourly_patterns(df: pd.DataFrame):
    if df.empty:
        return "<p>No data</p>", None
    df['hour'] = df['timestamp'].dt.hour
    hourly = df.groupby('hour')[['temperature', 'humidity', 'soil']].mean()
    fig = go.Figure()
    for col, _, color, name in SENSORS:
        fig.add_trace(go.Scatter(x=hourly.index, y=hourly[col], name=name, line=dict(color=color)))
    fig.update_layout(title="Hourly Patterns", xaxis_title="Hour", yaxis_title="Value")
    return "<div>Hourly averages</div>", fig

def daily_patterns(df: pd.DataFrame):
    if df.empty:
        return "<p>No data</p>", None
    df['date'] = df['timestamp'].dt.date
    daily = df.groupby('date')[['temperature', 'humidity', 'soil']].mean()
    fig = go.Figure()
    for col, _, color, name in SENSORS:
        fig.add_trace(go.Scatter(x=daily.index, y=daily[col], name=name, line=dict(color=color)))
    fig.update_layout(title="Daily Patterns")
    return "<div>Daily averages</div>", fig

def distribution_analysis(df: pd.DataFrame):
    if df.empty:
        return "<p>No data</p>", None
    fig = make_subplots(rows=1, cols=3, subplot_titles=['Temp', 'Humidity', 'Soil'])
    for i, (col, _, color, _) in enumerate(SENSORS, 1):
        if col in df.columns:
            fig.add_trace(go.Histogram(x=df[col], marker_color=color), row=1, col=i)
    fig.update_layout(height=300, showlegend=False)
    return "<div>Distribution analysis</div>", fig

def time_series_decomposition(df: pd.DataFrame, variable: str):
    if df.empty or variable not in df.columns:
        return "<p>No data</p>", None
    df_sorted = df.sort_values('timestamp')
    df_sorted['MA_24'] = df_sorted[variable].rolling(window=24, min_periods=1).mean()
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df_sorted['timestamp'], y=df_sorted[variable], name='Raw', opacity=0.5))
    fig.add_trace(go.Scatter(x=df_sorted['timestamp'], y=df_sorted['MA_24'], name='24-pt MA', line=dict(width=2)))
    fig.update_layout(title=f"{variable.title()} with Moving Average")
    return "<div>Moving average analysis</div>", fig

print("✅ Visualization functions loaded")

# 🔧 LAYER 4: Service Layer

This layer contains microservices and external service integrations:
- Report generation microservice
- Service orchestration

## 4.1 Report Microservice Definition

In [None]:
%%writefile report_service.py
# ============================================================================
# REPORT GENERATION MICROSERVICE
# Note: This file runs as a separate process, so it requires its own imports
# and utility functions (unify_sensor_dfs, records_to_df) that are also 
# defined in the main notebook. This duplication is intentional for isolation.
# ============================================================================
# REPORT GENERATION MICROSERVICE
# ============================================================================
from fastapi import FastAPI
from fastapi.responses import Response
from pydantic import BaseModel
import os
import tempfile
from datetime import datetime, timedelta
from typing import List, Dict, Any
import pandas as pd
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from cerebras.cloud.sdk import Cerebras

app = FastAPI(title="Report Generation Microservice")

class ReportPayload(BaseModel):
    limit: int
    temperature: List[Dict[str, Any]]
    humidity: List[Dict[str, Any]]
    soil: List[Dict[str, Any]]

@app.get("/health")
def health():
    return {"ok": True}

def records_to_df(records: List[Dict[str, Any]]) -> pd.DataFrame:
    if not records:
        return pd.DataFrame(columns=["created_at", "value"])
    df = pd.DataFrame(records)
    if "created_at" not in df.columns or "value" not in df.columns:
        return pd.DataFrame(columns=["created_at", "value"])
    return df

def unify_sensor_dfs(dfs: dict) -> pd.DataFrame:
    def prep(df, col):
        if df is None or df.empty:
            return pd.DataFrame(columns=["timestamp", col])
        out = df.copy()
        if "timestamp" not in out.columns and "created_at" in out.columns:
            out = out.rename(columns={"created_at": "timestamp"})
        if "timestamp" not in out.columns:
            out = out.reset_index().rename(columns={"index": "timestamp"})
        if "timestamp" not in out.columns or "value" not in out.columns:
            return pd.DataFrame(columns=["timestamp", col])
        out = out[["timestamp", "value"]]
        ts = out["timestamp"]
        if pd.api.types.is_numeric_dtype(ts) or ts.astype(str).str.fullmatch(r"\d+").all():
            ts_num = pd.to_numeric(ts, errors="coerce")
            unit = "ms" if ts_num.dropna().astype(int).astype(str).str.len().median() >= 13 else "s"
            out["timestamp"] = pd.to_datetime(ts_num, errors="coerce", unit=unit, utc=True).dt.tz_convert("Asia/Jerusalem").dt.tz_localize(None)
        else:
            out["timestamp"] = pd.to_datetime(ts, errors="coerce", utc=True).dt.tz_convert("Asia/Jerusalem").dt.tz_localize(None)
        out = out.dropna(subset=["timestamp"])
        out["value"] = pd.to_numeric(out["value"], errors="coerce")
        out = out.dropna(subset=["value"])
        return out.rename(columns={"value": col})
    
    t = prep(dfs.get("temperature"), "temperature")
    h = prep(dfs.get("humidity"), "humidity")
    s = prep(dfs.get("soil"), "soil")
    df = t.merge(h, on="timestamp", how="outer").merge(s, on="timestamp", how="outer")
    return df.sort_values("timestamp").reset_index(drop=True)

class AutomatedReportGenerator:
    def __init__(self, cerebras_client, model_name: str):
        self.client = cerebras_client
        self.model_name = model_name

    def generate_daily_report(self, df: pd.DataFrame) -> str:
        if df.empty:
            return "No data available."
        try:
            cutoff = df["timestamp"].max() - timedelta(hours=24)
            daily = df[df["timestamp"] > cutoff]
            if daily.empty:
                daily = df.tail(100)
        except Exception:
            daily = df.tail(100)

        stats = {
            "date": daily["timestamp"].max().strftime("%Y-%m-%d"),
            "readings": len(daily),
            "temp_avg": daily["temperature"].mean(),
            "temp_min": daily["temperature"].min(),
            "temp_max": daily["temperature"].max(),
            "humidity_avg": daily["humidity"].mean(),
            "humidity_min": daily["humidity"].min(),
            "humidity_max": daily["humidity"].max(),
            "soil_avg": daily["soil"].mean(),
            "soil_min": daily["soil"].min(),
            "soil_max": daily["soil"].max(),
        }

        prompt = f"""Generate a professional daily plant health report:

DATE: {stats['date']}
READINGS: {stats['readings']}

CONDITIONS:
- Temperature: {stats['temp_avg']:.1f}°C (range: {stats['temp_min']:.1f}-{stats['temp_max']:.1f}°C)
- Humidity: {stats['humidity_avg']:.1f}% (range: {stats['humidity_min']:.1f}-{stats['humidity_max']:.1f}%)
- Soil: {stats['soil_avg']:.1f}% (range: {stats['soil_min']:.1f}-{stats['soil_max']:.1f}%)

Generate a concise summary (3-4 paragraphs) covering:
1) Overall conditions
2) Risks and stress factors
3) Care recommendations"""

        response = self.client.chat.completions.create(
            model=self.model_name,
            messages=[
                {"role": "system", "content": "You are an agricultural consultant."},
                {"role": "user", "content": prompt},
            ],
            temperature=0.3,
            max_tokens=800,
        )
        return response.choices[0].message.content

    def create_docx_report(self, df: pd.DataFrame, output_path: str) -> str:
        doc = Document()
        title = doc.add_heading("🌱 Daily Plant Health Report", 0)
        title.alignment = WD_ALIGN_PARAGRAPH.CENTER
        date_para = doc.add_paragraph()
        date_run = date_para.add_run(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}\n")
        date_run.bold = True
        date_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
        doc.add_heading("Executive Summary", 1)
        doc.add_paragraph(self.generate_daily_report(df))
        doc.add_heading("Statistical Summary", 1)
        stats_text = f"Total Readings: {len(df)}\nPeriod: {df['timestamp'].min().strftime('%Y-%m-%d')} to {df['timestamp'].max().strftime('%Y-%m-%d')}"
        doc.add_paragraph(stats_text)
        doc.save(output_path)
        return output_path

@app.post("/generate-docx")
def generate_docx(payload: ReportPayload):
    api_key = os.environ.get("CEREBRAS_API_KEY", "").strip()
    model_name = os.environ.get("REPORT_MODEL_NAME", "llama3.1-8b").strip()
    if not api_key:
        return Response(content=b"Missing CEREBRAS_API_KEY", status_code=500)
    
    client = Cerebras(api_key=api_key)
    report_gen = AutomatedReportGenerator(client, model_name)
    
    dfs = {
        "temperature": records_to_df(payload.temperature),
        "humidity": records_to_df(payload.humidity),
        "soil": records_to_df(payload.soil),
    }
    df = unify_sensor_dfs(dfs).dropna(subset=["timestamp"]).sort_values("timestamp").reset_index(drop=True)
    if df.empty:
        return Response(content=b"No data available.", status_code=400)
    
    fd, path = tempfile.mkstemp(suffix=".docx", prefix="report_")
    os.close(fd)
    try:
        report_gen.create_docx_report(df, output_path=path)
        with open(path, "rb") as f:
            docx_bytes = f.read()
    finally:
        try:
            os.remove(path)
        except Exception:
            pass
    
    return Response(
        content=docx_bytes,
        media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
        headers={"Content-Disposition": "attachment; filename=report.docx"},
    )

## 4.2 Microservice Launcher

In [None]:
# ============================================================================
# MICROSERVICE LAUNCHER
# ============================================================================

_microservice_proc = None

def start_report_microservice():
    """Start the report microservice."""
    global _microservice_proc
    
    # Set environment variables
    os.environ["CEREBRAS_API_KEY"] = CEREBRAS_API_KEY or ""
    os.environ["REPORT_MODEL_NAME"] = REPORT_MODEL_NAME
    
    # Start uvicorn
    cmd = [sys.executable, "-m", "uvicorn", "report_service:app", 
           "--host", REPORT_SERVICE_HOST, "--port", str(REPORT_SERVICE_PORT)]
    _microservice_proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
    
    # Health check
    health_url = f"http://{REPORT_SERVICE_HOST}:{REPORT_SERVICE_PORT}/health"
    for _ in range(30):
        try:
            r = requests.get(health_url, timeout=2)
            if r.ok and r.json().get("ok"):
                print(f"✅ Microservice UP: {health_url}")
                return True
        except Exception:
            pass
        time.sleep(0.5)
    
    print("❌ Microservice failed to start")
    return False

print("✅ Microservice launcher loaded")

# 🖥️ LAYER 5: Presentation Layer

This layer contains all Gradio UI components organized by tab.
Each tab builder function creates the UI for one feature.

## 5.1 Realtime Dashboard Tab

In [None]:
# ============================================================================
# TAB: REALTIME DASHBOARD
# ============================================================================

def build_realtime_dashboard_tab():
    gr.Markdown("<h3 style='margin:0; font-size:22px;'>🌿 Overall Plant Status (Real-Time)</h3>")
    
    samples = gr.Slider(1, 200, value=20, step=1, label="Number of Samples")
    overall_btn = gr.Button("Update Plant Dashboard", variant="primary")
    
    overall_status = gr.Textbox(label="Overall Status", lines=1)
    overall_info = gr.Textbox(label="Status Details", lines=4)
    
    with gr.Row():
        gr.Markdown(f"""<div style="padding:14px;border:1px solid #ccc;border-radius:10px;">
            <h4>🌿 Plant Status Legend</h4>
            <span style="color:{STATUS_OK_COLOR};font-size:26px;">●</span> <b>Healthy</b> - All values normal<br>
            <span style="color:{STATUS_WARN_COLOR};font-size:26px;">●</span> <b>Warning</b> - Near threshold<br>
            <span style="color:{STATUS_BAD_COLOR};font-size:26px;">●</span> <b>Not OK</b> - Out of range
        </div>""")
        gr.Markdown(f"""<div style="padding:14px;border:1px solid #ccc;border-radius:10px;">
            <h4>ℹ️ Valid Ranges</h4>
            <span style="color:{COLOR_TEMP};font-size:26px;">●</span> 🌡️ Temperature: 18-32°C<br>
            <span style="color:{COLOR_HUM};font-size:26px;">●</span> 💧 Humidity: 35-75%<br>
            <span style="color:{COLOR_SOIL};font-size:26px;">●</span> 🌱 Soil: 20-60%
        </div>""")
    
    with gr.Row():
        temp_plot = gr.Plot(label="Temperature")
        humidity_plot = gr.Plot(label="Humidity")
    with gr.Row():
        soil_plot = gr.Plot(label="Soil Moisture")
        combined_plot = gr.Plot(label="Combined")
    
    overall_btn.click(
        fn=plant_dashboard,
        inputs=[samples],
        outputs=[overall_status, overall_info, temp_plot, humidity_plot, soil_plot, combined_plot]
    )

print("✅ Realtime dashboard tab loaded")

## 5.2 Analytics Dashboard Tab

In [None]:
# ============================================================================
# TAB: ANALYTICS DASHBOARD
# ============================================================================

def dashboard_screen():
    """Load all data and return comprehensive dashboard."""
    df = load_sensor_data_from_firebase()
    if df.empty:
        empty_msg = "<div style='padding:20px;text-align:center;'>No data. Click Sync!</div>"
        return empty_msg, None, None, None, None, None, None, None, None, None, None
    
    kpi = create_kpi_cards(df)
    stats = create_stat_cards_html(df)
    ts = create_time_series_plot(df)
    corr_card, corr_plot = calculate_correlations(df)
    hourly_card, hourly_plot = hourly_patterns(df)
    daily_card, daily_plot = daily_patterns(df)
    dist_card, dist_plot = distribution_analysis(df)
    
    return kpi, stats, ts, corr_card, corr_plot, hourly_card, hourly_plot, daily_card, daily_plot, dist_card, dist_plot

def dashboard_moving_avg(variable):
    df = load_sensor_data_from_firebase()
    if df.empty:
        return None, "No data"
    return time_series_decomposition(df, variable)

def build_iot_dashboard_tab():
    gr.Markdown('### 📈 Comprehensive Sensor Analytics')
    
    refresh_btn = gr.Button('🔄 Refresh All Data', variant='primary')
    
    gr.Markdown('#### 📌 Current Readings')
    kpi_html = gr.HTML()
    
    gr.Markdown('#### 📊 Statistical Summary')
    stats_html = gr.HTML()
    
    gr.Markdown('#### 📈 Time Series')
    ts_plot = gr.Plot()
    
    gr.Markdown('#### 🔗 Correlations')
    corr_card = gr.HTML()
    corr_plot = gr.Plot()
    
    gr.Markdown('#### ⏰ Hourly Patterns')
    hourly_card = gr.HTML()
    hourly_plot = gr.Plot()
    
    gr.Markdown('#### 📅 Daily Trends')
    daily_card = gr.HTML()
    daily_plot = gr.Plot()
    
    gr.Markdown('#### 📊 Distributions')
    dist_card = gr.HTML()
    dist_plot = gr.Plot()
    
    gr.Markdown('#### 📉 Moving Averages')
    with gr.Row():
        ma_variable = gr.Dropdown(choices=['temperature', 'humidity', 'soil'], value='temperature', label='Variable')
        ma_btn = gr.Button('Generate')
    ma_card = gr.HTML()
    ma_plot = gr.Plot()
    
    refresh_btn.click(
        dashboard_screen,
        outputs=[kpi_html, stats_html, ts_plot, corr_card, corr_plot, 
                 hourly_card, hourly_plot, daily_card, daily_plot, dist_card, dist_plot]
    )
    ma_btn.click(dashboard_moving_avg, inputs=ma_variable, outputs=[ma_card, ma_plot])

print("✅ Analytics dashboard tab loaded")

## 5.3 Generate Report Tab

In [None]:
# ============================================================================
# TAB: GENERATE REPORT
# ============================================================================

def build_generate_report_tab():
    gr.Markdown("## 📄 Generate Report")
    gr.Markdown("Generate a Word report based on sensor data with AI-powered analysis.")
    
    report_samples = gr.Slider(minimum=5, maximum=200, value=20, step=1, label="Samples per sensor")
    report_btn = gr.Button("📥 Generate & Download Report", variant="primary")
    report_status = gr.Textbox(label="Status", lines=2)
    report_file = gr.File(label="Download DOCX")
    
    report_btn.click(
        fn=generate_report_screen_gamified,
        inputs=[report_samples],
        outputs=[report_status, report_file]
    )

print("✅ Generate report tab loaded")

## 5.4 Plant Disease Detection Tab

In [None]:
# ============================================================================
# TAB: PLANT DISEASE DETECTION
# ============================================================================

def build_plant_disease_detection_tab():
    gr.Markdown("## 🖼️ Plant Disease Detection")
    
    with gr.Row():
        with gr.Column(scale=2):
            image = gr.Image(type="filepath", label="Upload plant image", sources=["upload"])
            temp = gr.Slider(0, 45, value=25, label="Temperature (°C)")
            humidity = gr.Slider(0, 100, value=50, label="Humidity (%)")
            soil = gr.Slider(0, 100, value=50, label="Soil Moisture (%)")
            run_btn = gr.Button("Analyze Plant", variant="primary")
        
        with gr.Column(scale=2):
            diagnosis = gr.Textbox(label="Diagnosis")
            status = gr.HTML(label="Status")
            alerts = gr.Textbox(label="Alerts", lines=5)
            recommendations = gr.Textbox(label="Recommendations", lines=5)
    
    run_btn.click(
        fn=analyze_plant_gamified,
        inputs=[image, temp, humidity, soil],
        outputs=[diagnosis, status, alerts, recommendations]
    )

print("✅ Plant disease detection tab loaded")

## 5.5 RAG Chat Tab

In [None]:
# ============================================================================
# TAB: RAG CHAT
# ============================================================================

def rag_ui(query: str, k: int = 3):
    """Gradio adapter for RAG system."""
    if not query.strip():
        return "Please enter a question.", ""
    try:
        result = rag_answer_with_model(query, k=int(k))
        answer = result.get("answer", "No answer found")
        sources = result.get("sources", [])
        unique_sources = [s for s in list(set(sources)) if s]
        source_text = "Sources:\n" + "\n".join([f"- {url}" for url in unique_sources])
        return answer, source_text
    except Exception as e:
        return f"Error: {str(e)}", ""

def build_rag_chat_tab():
    gr.Markdown("### 🔍 Plant Disease Research Assistant")
    
    with gr.Row():
        with gr.Column():
            q_input = gr.Textbox(label="Question", placeholder="Ask about plant pathology...", lines=2)
            k_slider = gr.Slider(1, 5, value=3, step=1, label="Search Depth")
            ask_btn = gr.Button("Analyze Documents", variant="primary")
    
    with gr.Row():
        ans_out = gr.Textbox(label="Analysis Result", lines=8, interactive=False)
        src_out = gr.Textbox(label="Citations", lines=4, interactive=False)
    
    ask_btn.click(fn=rag_ui, inputs=[q_input, k_slider], outputs=[ans_out, src_out])

print("✅ RAG chat tab loaded")

## 5.6 Gemini Chat Tab

In [None]:
# ============================================================================
# TAB: GEMINI CHAT
# ============================================================================

def build_gemini_chat_tab():
    gr.Markdown("## 💬 Gemini Free Chat")
    gr.Markdown("Free conversation powered by Gemini (multi-turn history).")
    
    chat = gr.Chatbot(label="Chat")
    state = gr.State([])
    
    msg = gr.Textbox(label="Message", placeholder="Type here...", lines=2)
    temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="Creativity")
    
    with gr.Row():
        send_btn = gr.Button("Send", variant="primary")
        clear_btn = gr.Button("Clear")
    
    send_btn.click(fn=gemini_turn, inputs=[msg, state, temperature], outputs=[msg, chat, state])
    msg.submit(fn=gemini_turn, inputs=[msg, state, temperature], outputs=[msg, chat, state])
    
    def clear_chat():
        return [], []
    clear_btn.click(fn=clear_chat, inputs=[], outputs=[chat, state])

print("✅ Gemini chat tab loaded")

## 5.7 Sync Data Tab

In [None]:
# ============================================================================
# TAB: SYNC DATA
# ============================================================================

def build_sync_data_tab():
    gr.Markdown('## 🔄 Sync Data')
    gr.Markdown('Upload IoT Data from Server to Firebase')
    
    sync_btn = gr.Button('🔄 Sync New Data', variant='primary', size='lg')
    sync_output = gr.Textbox(label='Status', lines=5)
    
    sync_btn.click(fn=sync_screen_gamified, outputs=sync_output)

print("✅ Sync data tab loaded")

## 5.8 Rewards Tab

In [None]:
# ============================================================================
# TAB: FARM REWARDS (GAMIFICATION)
# ============================================================================

def build_rewards_tab():
    gr.Markdown("## 🎮 Farm Rewards")
    gr.Markdown("Complete daily missions to earn points and spins!")
    
    with gr.Row():
        points_box = gr.Number(label="🏆 Total Points", interactive=False)
        spins_box = gr.Number(label="🎰 Spins Available", interactive=False)
    
    gr.Markdown("### 📋 Daily Missions")
    missions_md = gr.Markdown()
    
    gr.Markdown("### 🎫 Recent Coupons")
    coupons_txt = gr.Textbox(label="Coupons", lines=3, interactive=False)
    
    with gr.Row():
        spin_btn = gr.Button("🎰 Spin Wheel!", variant="primary")
        refresh_btn = gr.Button("🔄 Refresh")
    
    spin_result = gr.Textbox(label="Spin Result", lines=1)
    
    spin_btn.click(
        fn=lambda: (spin_wheel()[0],) + rewards_refresh(),
        outputs=[spin_result, points_box, spins_box, missions_md, coupons_txt]
    )
    refresh_btn.click(fn=rewards_refresh, outputs=[points_box, spins_box, missions_md, coupons_txt])
    
    return points_box, spins_box, missions_md, coupons_txt

print("✅ Rewards tab loaded")

# 🚀 LAYER 6: Application Layer

This layer contains the application orchestration:
- Tab registry
- App builder
- Initialization and launch

## 6.1 Tab Registry

In [None]:
# ============================================================================
# TAB REGISTRY
# ============================================================================

# Central registry of all tabs - add/remove tabs here only
TABS = [
    ("🌱 Realtime Dashboard", build_realtime_dashboard_tab),
    ("📊 Analytics Dashboard", build_iot_dashboard_tab),
    ("📄 Generate Report", build_generate_report_tab),
    ("🖼️ Plant Disease Detection", build_plant_disease_detection_tab),
    ("💬 RAG Chat", build_rag_chat_tab),
    ("💬 Gemini Chat", build_gemini_chat_tab),
    ("🔄 Sync Data", build_sync_data_tab),
    ("🎮 Farm Rewards", build_rewards_tab),
]

print("✅ Tab registry loaded")

## 6.2 Application Builder

In [None]:
# ============================================================================
# APPLICATION BUILDER
# ============================================================================

def build_app():
    """Build the main Gradio application."""
    with gr.Blocks(css=CUSTOM_CSS, title="Cloud Garden - IoT & AI") as demo:
        gr.Markdown("# 🌿 Cloud Garden - IoT & AI")
        
        rewards_tab_ref = None
        rewards_outputs = None
        
        with gr.Tabs():
            for tab_name, tab_builder in TABS:
                if tab_name.startswith("🎮"):
                    with gr.Tab(tab_name) as rewards_tab_ref:
                        rewards_outputs = tab_builder()
                else:
                    with gr.Tab(tab_name):
                        tab_builder()
        
        # Auto-refresh rewards on load
        if rewards_tab_ref and rewards_outputs:
            points_box, spins_box, missions_md, coupons_txt = rewards_outputs
            demo.load(fn=rewards_refresh, outputs=[points_box, spins_box, missions_md, coupons_txt])
            rewards_tab_ref.select(fn=rewards_refresh, outputs=[points_box, spins_box, missions_md, coupons_txt])
    
    return demo

print("✅ Application builder loaded")

## 6.3 Initialization

In [None]:
# ============================================================================
# INITIALIZATION
# ============================================================================

def initialize_all():
    """Initialize all services and connections."""
    print("=" * 50)
    print("🚀 Initializing CloudGarden Application")
    print("=" * 50)
    
    # Initialize API keys
    initialize_api_keys()
    
    # Initialize Firebase
    initialize_firebase()
    
    # Initialize LLM clients
    init_cerebras_client()
    init_gemini_client()
    
    # Initialize gamification
    init_gamification_ref()
    
    # Load RAG index
    try:
        load_store_from_firebase(load_text=False)
    except Exception as e:
        print(f"⚠️ Could not load RAG index: {e}")
    
    # Start microservice
    start_report_microservice()
    
    print("=" * 50)
    print("✅ Initialization Complete!")
    print("=" * 50)

print("✅ Initialization function defined (call initialize_all() to start)")

## 6.4 Data Ingestion Example

In [None]:
# ============================================================================
# DATA INGESTION EXAMPLE
# ============================================================================

# Load data from Firebase (Data Lake pattern)
df = load_sensor_data_from_firebase()
print(f"Data shape: {df.shape}")
df.head()

## 6.5 MapReduce Analysis Example

In [None]:
# ============================================================================
# MAPREDUCE ANALYSIS EXAMPLE
# ============================================================================

if not df.empty:
    # Map: create hourly buckets
    df['hour_bucket'] = df['timestamp'].dt.floor('H')
    
    # Reduce: aggregate per hour
    agg_hourly = df.groupby('hour_bucket', as_index=False).agg(
        avg_temperature=('temperature', 'mean'),
        avg_humidity=('humidity', 'mean'),
        avg_soil=('soil', 'mean')
    )
    
    # Visualize
    plt.figure(figsize=(10, 4))
    plt.plot(agg_hourly['hour_bucket'], agg_hourly['avg_temperature'])
    plt.xlabel('Time (hourly)')
    plt.ylabel('Average Temperature')
    plt.title('Average Temperature per Hour')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig("avg_temperature_per_hour.png")
    plt.show()
    
    print("Graph saved to: avg_temperature_per_hour.png")
    agg_hourly.head()

## 6.6 Launch Application

In [None]:
# ============================================================================
# LAUNCH APPLICATION
# ============================================================================

if __name__ == "__main__":
    # Initialize all services before launching
    initialize_all()
    
    # Build and launch the app
    app = build_app()
    app.launch(share=True, debug=True)