In [1]:
!pip install google-generativeai

Collecting google-ai-generativelanguage==0.4.0 (from google-generativeai)
  Using cached google_ai_generativelanguage-0.4.0-py3-none-any.whl.metadata (5.1 kB)
Using cached google_ai_generativelanguage-0.4.0-py3-none-any.whl (598 kB)
Installing collected packages: google-ai-generativelanguage
  Attempting uninstall: google-ai-generativelanguage
    Found existing installation: google-ai-generativelanguage 0.6.18
    Uninstalling google-ai-generativelanguage-0.6.18:
      Successfully uninstalled google-ai-generativelanguage-0.6.18
Successfully installed google-ai-generativelanguage-0.4.0



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import google.generativeai as genai

genai.configure(api_key="AIzaSyAVBQQmLyoZ0ke0hx_OH8IKvRW0OaSEsa8")

In [9]:
import google.generativeai as genai

def summarize_timeline_gemini(timeline_json, api_key=None):
    """
    Summarizes officer attendance timeline data in a formal report format using Gemini.

    Args:
        timeline_json: JSON object containing attendance records with date, status (present/absent),
                      and reason (if absent)
        api_key: Google API key (optional if already configured)

    Returns:
        str: Formal summary of the monthly activities
    """
    # Configure API key if provided
    if api_key:
        genai.configure(api_key=api_key)

    # Convert JSON to human-readable text
    text = timeline_to_human_text(timeline_json)

    # Improved prompt with better grammar and structure
    prompt = f"""You are a professional assistant that creates formal attendance summaries for official records. Maintain a professional, objective tone.

Analyze the following monthly attendance data and provide a formal summary.

Format the summary as:
"The officer was assigned [X] hearings this month, of which they were present for [Y]. They were absent for [Z] hearings: [A] due to medical reasons, [B] due to [other reasons], and [C] without providing a reason."

Attendance Data:
{text}

Provide only the summary, no additional commentary."""

    # Try preferred Gemini model, but fall back gracefully if it's not available.
    # Some API versions / accounts may not have 'gemini-1.5-flash' available;
    # attempt it first, then try a known compatible model, and finally a local summary.
    preferred_model = 'gemini-1.5-flash'
    try:
        model = genai.GenerativeModel(preferred_model)
        response = model.generate_content(
            prompt,
            generation_config=genai.GenerationConfig(
                max_output_tokens=250,
                temperature=0.5,
            )
        )
        summary = response.text.strip()
        return summary
    except Exception:
        # Fallback 1: try a supported text model via a simpler API call (if available)
        try:
            resp = genai.generate_text(
                model='text-bison@001',
                prompt=prompt,
                max_output_tokens=250,
                temperature=0.5,
            )
            # resp may expose .text or a structured .output -- try common locations
            summary = getattr(resp, "text", None)
            if not summary:
                if hasattr(resp, "output") and resp.output:
                    # some responses expose output as a list of dicts
                    first = resp.output[0]
                    summary = first.get("content") if isinstance(first, dict) else str(first)
                else:
                    summary = str(resp)
            summary = (summary or "").strip()
            return summary
        except Exception:
            # Final fallback: do a local deterministic summary (no external API)
            def simple_summarize_local(timeline_json):
                total = len(timeline_json) if timeline_json else 0
                present = sum(1 for e in (timeline_json or []) if e.get('status', '').lower() == 'present')
                absent = total - present
                medical = sum(1 for e in (timeline_json or []) if e.get('status', '').lower() == 'absent' and e.get('reason') and 'medical' in str(e.get('reason')).lower())
                other = sum(1 for e in (timeline_json or []) if e.get('status', '').lower() == 'absent' and e.get('reason') and 'medical' not in str(e.get('reason')).lower())
                no_reason = sum(1 for e in (timeline_json or []) if e.get('status', '').lower() == 'absent' and not e.get('reason'))
                return (f"The officer was assigned {total} hearings this month, of which they were present for {present}. "
                        f"They were absent for {absent} hearings: {medical} due to medical reasons, {other} due to other reasons, "
                        f"and {no_reason} without providing a reason.")
            return simple_summarize_local(timeline_json)


def timeline_to_human_text(timeline_json):
    """
    Converts JSON timeline data to human-readable text format.

    Expected JSON format:
    [
        {
            "date": "2025-01-15",
            "status": "present",
            "reason": null
        },
        {
            "date": "2025-01-16",
            "status": "absent",
            "reason": "medical"
        }
    ]
    """
    if not timeline_json:
        return "No attendance data available."

    lines = []
    for entry in timeline_json:
        date = entry.get('date', 'Unknown date')
        status = entry.get('status', 'Unknown status')
        reason = entry.get('reason', None)

        if status.lower() == 'present':
            lines.append(f"- {date}: Present")
        elif status.lower() == 'absent':
            if reason:
                lines.append(f"- {date}: Absent (Reason: {reason})")
            else:
                lines.append(f"- {date}: Absent (No reason provided)")
        else:
            lines.append(f"- {date}: {status}")

    return "\n".join(lines)


# Example usage
if __name__ == "__main__":
    # Set your API key
    # genai.configure(api_key="YOUR_API_KEY")

    timeline_data = [
        {"date": "2025-01-15", "status": "present", "reason": None},
        {"date": "2025-01-16", "status": "absent", "reason": "medical"},
        {"date": "2025-01-17", "status": "present", "reason": None},
        {"date": "2025-01-18", "status": "absent", "reason": None},
        {"date": "2025-01-19", "status": "present", "reason": None},
    ]

    summary = summarize_timeline_gemini(timeline_data)
    print(summary)

The officer was assigned 5 hearings this month, of which they were present for 3. They were absent for 2 hearings: 1 due to medical reasons, 0 due to other reasons, and 1 without providing a reason.


In [11]:
# Comprehensive Dummy Data for Monthly Report Generation
# Based on H4S (Hearing for Success) Backend Schema

import json
from datetime import datetime, timedelta
import random

# Generate dummy data for 20 Investigating Officers for January 2025
def generate_monthly_report_data():
    """
    Generate comprehensive monthly report data for all investigating officers
    """
    
    officers = [
        {"id": "IO001", "name": "SI Rajesh Kumar", "rank": "Sub-Inspector", "station": "Bhubaneswar PS"},
        {"id": "IO002", "name": "SI Priya Singh", "rank": "Sub-Inspector", "station": "Cuttack PS"},
        {"id": "IO003", "name": "ASI Suresh Nayak", "rank": "Assistant Sub-Inspector", "station": "Puri PS"},
        {"id": "IO004", "name": "SI Anjali Das", "rank": "Sub-Inspector", "station": "Berhampur PS"},
        {"id": "IO005", "name": "ASI Kavita Rath", "rank": "Assistant Sub-Inspector", "station": "Rourkela PS"},
        {"id": "IO006", "name": "SI Bikash Mohanty", "rank": "Sub-Inspector", "station": "Sambalpur PS"},
        {"id": "IO007", "name": "ASI Sujata Panda", "rank": "Assistant Sub-Inspector", "station": "Balasore PS"},
        {"id": "IO008", "name": "SI Ramesh Behera", "rank": "Sub-Inspector", "station": "Kendrapara PS"},
        {"id": "IO009", "name": "ASI Mina Swain", "rank": "Assistant Sub-Inspector", "station": "Jagatsinghpur PS"},
        {"id": "IO010", "name": "SI Prakash Sahoo", "rank": "Sub-Inspector", "station": "Khordha PS"},
        {"id": "IO011", "name": "ASI Dipti Mallick", "rank": "Assistant Sub-Inspector", "station": "Nayagarh PS"},
        {"id": "IO012", "name": "SI Subash Jena", "rank": "Sub-Inspector", "station": "Ganjam PS"},
        {"id": "IO013", "name": "ASI Laxmi Padhi", "rank": "Assistant Sub-Inspector", "station": "Angul PS"},
        {"id": "IO014", "name": "SI Debasis Parida", "rank": "Sub-Inspector", "station": "Dhenkanal PS"},
        {"id": "IO015", "name": "ASI Rinku Sahu", "rank": "Assistant Sub-Inspector", "station": "Sundargarh PS"},
        {"id": "IO016", "name": "SI Mamata Biswal", "rank": "Sub-Inspector", "station": "Bargarh PS"},
        {"id": "IO017", "name": "ASI Jitendra Patra", "rank": "Assistant Sub-Inspector", "station": "Jharsuguda PS"},
        {"id": "IO018", "name": "SI Srikant Dash", "rank": "Sub-Inspector", "station": "Bolangir PS"},
        {"id": "IO019", "name": "ASI Puspa Sethy", "rank": "Assistant Sub-Inspector", "station": "Nuapada PS"},
        {"id": "IO020", "name": "SI Prasanta Barik", "rank": "Sub-Inspector", "station": "Kalahandi PS"},
    ]
    
    cases = [
        {"caseId": "CR/001/2025", "firNumber": "FIR001", "type": "Theft", "status": "ongoing"},
        {"caseId": "CR/002/2025", "firNumber": "FIR002", "type": "Fraud", "status": "ongoing"},
        {"caseId": "CR/003/2025", "firNumber": "FIR003", "type": "Assault", "status": "ongoing"},
        {"caseId": "CR/004/2025", "firNumber": "FIR004", "type": "Robbery", "status": "disposed"},
        {"caseId": "CR/005/2025", "firNumber": "FIR005", "type": "Cybercrime", "status": "ongoing"},
        {"caseId": "CR/006/2025", "firNumber": "FIR006", "type": "Theft", "status": "adjourned"},
        {"caseId": "CR/007/2025", "firNumber": "FIR007", "type": "Fraud", "status": "ongoing"},
        {"caseId": "CR/008/2025", "firNumber": "FIR008", "type": "Murder", "status": "ongoing"},
    ]
    
    absence_reasons = ["medical", "emergency", "official_duty", "family_emergency", "court_duty", None]
    
    # Generate data for each officer
    report_data = []
    
    for officer in officers:
        # Random number of cases assigned (1-4)
        num_cases = random.randint(1, 4)
        assigned_cases = random.sample(cases, num_cases)
        
        # Generate attendance for January 2025
        start_date = datetime(2025, 1, 1)
        end_date = datetime(2025, 1, 31)
        
        attendance_records = []
        total_hearings = 0
        present_count = 0
        absent_count = 0
        late_count = 0
        
        current_date = start_date
        while current_date <= end_date:
            # Skip weekends
            if current_date.weekday() < 5:  # Monday = 0, Friday = 4
                # Random chance of having a hearing (60% chance)
                if random.random() < 0.6:
                    total_hearings += 1
                    
                    # 85% present, 10% absent, 5% late
                    rand_val = random.random()
                    if rand_val < 0.85:
                        status = "present"
                        present_count += 1
                        reason = None
                    elif rand_val < 0.95:
                        status = "absent"
                        absent_count += 1
                        reason = random.choice(absence_reasons)
                    else:
                        status = "late"
                        late_count += 1
                        reason = None
                    
                    attendance_records.append({
                        "date": current_date.strftime("%Y-%m-%d"),
                        "status": status,
                        "reason": reason,
                        "caseId": random.choice(assigned_cases)["caseId"],
                        "hearingTime": f"{random.randint(9, 15)}:00"
                    })
            
            current_date += timedelta(days=1)
        
        # Calculate statistics
        attendance_rate = round((present_count / total_hearings * 100), 2) if total_hearings > 0 else 0
        
        officer_report = {
            "officer": officer,
            "month": "January 2025",
            "cases": assigned_cases,
            "attendance": {
                "total_hearings": total_hearings,
                "present": present_count,
                "absent": absent_count,
                "late": late_count,
                "attendance_rate": attendance_rate
            },
            "timeline": attendance_records,
            "summary": f"The officer was assigned {total_hearings} hearings this month, of which they were present for {present_count}. They were absent for {absent_count} hearings and late for {late_count} hearings. Overall attendance rate: {attendance_rate}%"
        }
        
        report_data.append(officer_report)
    
    return report_data

# Generate and save the data
monthly_data = generate_monthly_report_data()

# Save to JSON file
with open('monthly_report_data.json', 'w') as f:
    json.dump(monthly_data, f, indent=2)

print(f"Generated data for {len(monthly_data)} officers")
print(f"\nSample officer data:")
print(json.dumps(monthly_data[0], indent=2))

# Statistics
total_hearings = sum(officer['attendance']['total_hearings'] for officer in monthly_data)
total_present = sum(officer['attendance']['present'] for officer in monthly_data)
total_absent = sum(officer['attendance']['absent'] for officer in monthly_data)

print(f"\n=== Overall Statistics ===")
print(f"Total Officers: {len(monthly_data)}")
print(f"Total Hearings: {total_hearings}")
print(f"Total Present: {total_present}")
print(f"Total Absent: {total_absent}")
print(f"Overall Attendance Rate: {round(total_present/total_hearings*100, 2)}%")

Generated data for 20 officers

Sample officer data:
{
  "officer": {
    "id": "IO001",
    "name": "SI Rajesh Kumar",
    "rank": "Sub-Inspector",
    "station": "Bhubaneswar PS"
  },
  "month": "January 2025",
  "cases": [
    {
      "caseId": "CR/007/2025",
      "firNumber": "FIR007",
      "type": "Fraud",
      "status": "ongoing"
    }
  ],
  "attendance": {
    "total_hearings": 17,
    "present": 13,
    "absent": 1,
    "late": 3,
    "attendance_rate": 76.47
  },
  "timeline": [
    {
      "date": "2025-01-01",
      "status": "present",
      "reason": null,
      "caseId": "CR/007/2025",
      "hearingTime": "11:00"
    },
    {
      "date": "2025-01-07",
      "status": "present",
      "reason": null,
      "caseId": "CR/007/2025",
      "hearingTime": "9:00"
    },
    {
      "date": "2025-01-08",
      "status": "late",
      "reason": null,
      "caseId": "CR/007/2025",
      "hearingTime": "15:00"
    },
    {
      "date": "2025-01-09",
      "status": "presen