In [None]:
# @title üöÄ Project: Intelligent Parking Violation Analytics (End-to-End Pipeline)

# ==========================================
# PART 1: INSTALL DEPENDENCIES
# ==========================================
print("‚è≥ Installing libraries... (Please wait)")
!pip install -q transformers torch scikit-learn pandas numpy seqeval gradio
!pip install -q indic-nlp-library
print("‚úÖ Libraries installed.")

import os
import re
import random
import torch
import datetime
import pandas as pd
import numpy as np
import gradio as gr
from sklearn.model_selection import train_test_split
from transformers import XLMRobertaTokenizer, XLMRobertaForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import Dataset

# ==========================================
# PART 2: GENERATE MULTILINGUAL DATASET
# ==========================================
print("‚è≥ Generating Synthetic Multilingual Data...")

# We include English, Hinglish, Hindi (Devanagari), and simulated South Indian styles
templates = {
    "No Parking": [
        "{plate} no parking mein khadi hai.",
        "{plate} park ki gayi hai no parking zone mein.",
        "Illegal parking by {plate} near the shop.",
        "{plate} is parked in a no parking area.",
        "{plate} ‡§Ø‡§π‡§æ‡§Å ‡§®‡•ã ‡§™‡§æ‡§∞‡•ç‡§ï‡§ø‡§Ç‡§ó ‡§Æ‡•á‡§Ç ‡§ñ‡•ú‡•Ä ‡§π‡•à‡•§", # Hindi
        "{plate} ‡§ó‡§≤‡§§ ‡§ú‡§ó‡§π ‡§™‡§æ‡§∞‡•ç‡§ï ‡§ï‡•Ä ‡§ó‡§Ø‡•Ä ‡§π‡•à‡•§",     # Hindi
        "{plate} no parking la iruku.",         # Tamil Style
        "Wrong parking by {plate}.",
    ],
    "Wrong Side": [
        "{plate} galat side se aa rahi thi.",
        "{plate} driving on the wrong side.",
        "{plate} ‡§ó‡§≤‡§§ ‡§¶‡§ø‡§∂‡§æ ‡§∏‡•á ‡§Ü ‡§∞‡§π‡•Ä ‡§•‡•Ä‡•§",        # Hindi
        "{plate} ‡§∞‡•ã‡§Ç‡§ó ‡§∏‡§æ‡§á‡§° ‡§°‡•ç‡§∞‡§æ‡§á‡§µ ‡§ï‡§∞ ‡§∞‡§π‡§æ ‡§π‡•à‡•§",   # Hindi
        "{plate} wrong route lo vastundi.",     # Telugu Style
        "Wrong way driving by {plate}.",
    ],
    "Obstruction": [
        "{plate} gate rok ke khadi hai.",
        "{plate} blocking the main gate.",
        "{plate} ‡§ó‡•á‡§ü ‡§∞‡•ã‡§ï ‡§ï‡§∞ ‡§ñ‡•ú‡•Ä ‡§π‡•à‡•§",           # Hindi
        "{plate} ‡§∞‡§æ‡§∏‡•ç‡§§‡§æ ‡§ú‡§æ‡§Æ ‡§ï‡§∞ ‡§∞‡§π‡•Ä ‡§π‡•à‡•§",         # Hindi
        "{plate} gate block chesindi.",         # Telugu Style
        "{plate} causing traffic jam.",
    ]
}

def get_plate():
    state = random.choice(["MH", "DL", "KA", "TN", "AP", "TS", "UP", "PB"])
    num = random.randint(10, 99)
    alpha = random.choice(["AA", "AB", "ZZ", "XY", "CC"])
    digits = random.randint(1000, 9999)
    return f"{state}{num}{alpha}{digits}"

# Generate 400 samples
data = []
for _ in range(400):
    label = random.choice(["No Parking", "Wrong Side", "Obstruction"])
    template = random.choice(templates[label])
    text = template.format(plate=get_plate())
    data.append([text, label])

df = pd.DataFrame(data, columns=['text', 'label'])
label_map = {"No Parking": 0, "Wrong Side": 1, "Obstruction": 2}
df['label_id'] = df['label'].map(label_map)
print(f"‚úÖ Dataset created with {len(df)} samples.")

# ==========================================
# PART 3: TRAIN THE AI MODEL (XLM-RoBERTa)
# ==========================================
print("‚è≥ Initializing Model Training... (This takes 2-4 mins on GPU)")

class PatrolDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=64):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]
        encoding = self.tokenizer(
            text, add_special_tokens=True, max_length=self.max_len,
            return_token_type_ids=False, padding='max_length',
            truncation=True, return_attention_mask=True, return_tensors='pt',
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Model Setup
model_name = "xlm-roberta-base"
tokenizer = XLMRobertaTokenizer.from_pretrained(model_name)
model = XLMRobertaForSequenceClassification.from_pretrained(model_name, num_labels=3)

# Data Split
X_train, X_val, y_train, y_val = train_test_split(df['text'], df['label_id'], test_size=0.2)
train_dataset = PatrolDataset(X_train.to_numpy(), y_train.to_numpy(), tokenizer)
val_dataset = PatrolDataset(X_val.to_numpy(), y_val.to_numpy(), tokenizer)

# Training Config
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=10,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    eval_strategy="epoch",
    save_strategy="epoch"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset
)

trainer.train()
print("‚úÖ AI Model Trained Successfully!")


‚è≥ Installing libraries... (Please wait)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m43.6/43.6 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for seqeval (setup.py) ... [?25l[?25hdone
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m40.3/40.3 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m7.7/7.7 MB[0m [31m55.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m121.1/121.1 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25h‚úÖ Libraries installed.
‚è≥ Generati

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Loading weights:   0%|          | 0/197 [00:00<?, ?it/s]

XLMRobertaForSequenceClassification LOAD REPORT from: xlm-roberta-base
Key                         | Status     | 
----------------------------+------------+-
lm_head.dense.bias          | UNEXPECTED | 
lm_head.layer_norm.weight   | UNEXPECTED | 
lm_head.dense.weight        | UNEXPECTED | 
roberta.pooler.dense.weight | UNEXPECTED | 
lm_head.layer_norm.bias     | UNEXPECTED | 
roberta.pooler.dense.bias   | UNEXPECTED | 
lm_head.bias                | UNEXPECTED | 
classifier.out_proj.weight  | MISSING    | 
classifier.out_proj.bias    | MISSING    | 
classifier.dense.weight     | MISSING    | 
classifier.dense.bias       | MISSING    | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.
`logging_dir` is deprecated and will be removed in v5.2. Please set `TENSORBOARD_LOGGING_DIR` instead.


Epoch,Training Loss,Validation Loss
1,0.920622,0.706896
2,0.04083,0.012016
3,0.00526,0.002264


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

‚úÖ AI Model Trained Successfully!


In [None]:
# ==========================================
# PART 4: INTELLIGENCE LAYERS (Logic & Rules)
# ==========================================

# A. Traffic Rules Database
TRAFFIC_RULES = {
    "No Parking": {
        "Fine": "‚Çπ500",
        "Section": "Sec 177 MV Act",
        "Priority": "Low",
        "Action": "Issue E-Challan"
    },
    "Wrong Side": {
        "Fine": "‚Çπ1500",
        "Section": "Sec 184 MV Act",
        "Priority": "High",
        "Action": "Seize License & Challan"
    },
    "Obstruction": {
        "Fine": "‚Çπ500 + Towing Charges",
        "Section": "Sec 201 MV Act",
        "Priority": "Critical",
        "Action": "Tow Vehicle Immediately"
    }
}

# B. Extract License Plate (Regex)
def extract_license_plate(text):
    pattern = r'[A-Z]{2}[ -]?[0-9]{1,2}[ -]?[A-Z]{1,3}[ -]?[0-9]{4}'
    match = re.search(pattern, text.upper())
    return match.group(0) if match else "Not Found"

# C. Extract Location (Heuristic NER)
def extract_location(text):
    lower_text = text.lower()
    indicators = ["near", "at", "opposite", "opp", "behind", "in front of", "next to"]
    detected_location = "Unknown Location"
    for ind in indicators:
        if ind in lower_text:
            parts = lower_text.split(ind, 1)
            if len(parts) > 1:
                loc_raw = parts[1].strip().split()[:3]
                detected_location = " ".join(loc_raw).title()
                break
    return detected_location

# D. Predict Function
def predict_full_logic(text):
    # 1. AI Prediction
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=64).to(model.device)
    outputs = model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    pred_idx = torch.argmax(probs).item()
    confidence = probs[0][pred_idx].item()

    id_to_label = {0: "No Parking", 1: "Wrong Side", 2: "Obstruction"}
    violation = id_to_label[pred_idx]

    # 2. Extract Entities
    plate = extract_license_plate(text)
    location = extract_location(text)

    # 3. Get Rules
    rule = TRAFFIC_RULES[violation]

    return {
        "violation": violation,
        "confidence": confidence,
        "plate": plate,
        "location": location,
        "rule": rule
    }




In [None]:
# @title 5. Launch The "Pro" Police Interface (HTML Receipt + Quick Buttons)

# --- 1. Define the "Visual Receipt" Generator ---
def generate_html_challan(patrol_note):
    # Get Data from AI
    result = predict_full_logic(patrol_note)

    # Determine Color based on Priority
    color_map = {"Low": "green", "High": "orange", "Critical": "red"}
    priority_color = color_map.get(result['rule']['Priority'], "black")

    # Create Professional HTML Receipt
    html_content = f"""
    <div style="font-family: 'Courier New', monospace; border: 2px dashed #333; padding: 20px; background-color: #f9f9f9; width: 100%; max-width: 400px; margin: auto;">
        <div style="text-align: center; border-bottom: 2px solid #333; padding-bottom: 10px; margin-bottom: 15px;">
            <h2 style="margin:0;">üö® E-CHALLAN üö®</h2>
            <p style="margin:5px; font-size: 12px;">TRAFFIC ENFORCEMENT SYSTEM</p>
            <p style="font-size: 10px; color: #555;">{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} | ID: #{random.randint(10000,99999)}</p>
        </div>


        <p><strong>üöó VEHICLE NO:</strong> <br> <span style="font-size: 18px; font-weight: bold; letter-spacing: 2px;">{result['plate']}</span></p>
        <p><strong>‚ö†Ô∏è VIOLATION:</strong> <br> {result['violation']}</p>

        <hr style="border-top: 1px dashed #bbb;">

        <div style="background-color: #eee; padding: 10px; border-radius: 5px;">
            <p style="margin:5px;"><strong>üìú SECTION:</strong> {result['rule']['Section']}</p>
            <p style="margin:5px;"><strong>üö® PRIORITY:</strong> <span style="color: {priority_color}; font-weight: bold;">{result['rule']['Priority']}</span></p>
            <p style="margin:5px; font-size: 18px;"><strong>üí∞ FINE: <span style="color: #d9534f;">{result['rule']['Fine']}</span></strong></p>
        </div>

        <div style="text-align: center; margin-top: 20px; color: #777;">
            <p style="font-size: 10px;">ACTION REQUIRED: {result['rule']['Action']}</p>
            <p style="font-size: 9px;">AI Confidence: {result['confidence']:.2%}</p>
        </div>
    </div>
    """
    return html_content

# --- 2. Build the Advanced Dashboard ---
# Define a professional theme
theme = gr.themes.Soft(
    primary_hue="slate",
    secondary_hue="blue",
    text_size="lg",
    spacing_size="sm",
)

with gr.Blocks(theme=theme, title="Police AI Dashboard") as app:
    # Header
    with gr.Row():
        gr.Markdown(
            """
            # üöì Traffic Violation Analytics Platform
            **AI-Powered Patrol Assistant for Low-Resource Languages**
            """
        )

    # Main Content Area
    with gr.Row():
        # LEFT COLUMN: Input
        with gr.Column(scale=1):
            gr.Markdown("### üìù Officer's Input")
            input_text = gr.Textbox(
                label="Voice Note / Patrol Observation",
                placeholder="Type here or use examples below...",
                lines=5,
                show_label=False
            )

            submit_btn = gr.Button("üöÄ GENERATE CHALLAN", variant="primary", size="lg")
            clear_btn = gr.Button("üóëÔ∏è Clear", variant="secondary")

            # Quick Examples (Clicking these auto-fills the input)
            gr.Markdown("### ‚ö° Quick Test Examples")
            examples = gr.Examples(
                examples=[
                    ["MH12AB1234 no parking mein khadi hai near Main Market."],
                    ["DL3C9999 wrong side aa raha hai opposite City Hospital."],
                    ["TN07ZZ5555 gate rok ke khadi hai at MG Road."],
                    ["KA01XY1111 driving on wrong side near School Zone."],
                    ["AP21CC7777 no parking la iruku."]
                ],
                inputs=input_text
            )

        # RIGHT COLUMN: Output (The Digital Receipt)
        with gr.Column(scale=1):
            gr.Markdown("### üßæ E-Challan Preview")
            # We use HTML component for the receipt look
            output_area = gr.HTML(label="Generated Ticket")

    # Logic Connections
    submit_btn.click(fn=generate_html_challan, inputs=input_text, outputs=output_area)
    clear_btn.click(lambda: (None, None), outputs=[input_text, output_area])

# --- 3. Launch ---
print("üöÄ Launching Pro Interface... Click the link below!")
app.launch(share=True, debug=False)

NameError: name 'gr' is not defined