In [None]:
!pip install sentence-transformers faiss-cpu transformers feedparser streamlit
!pip install langchain langchain-community langgraph
!pip install crewai
!pip install --upgrade gradio


Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0.post1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.0 kB)
Collecting feedparser
  Downloading feedparser-6.0.11-py3-none-any.whl.metadata (2.4 kB)
Collecting streamlit
  Downloading streamlit-1.48.0-py3-none-any.whl.metadata (9.5 kB)
Collecting sgmllib3k (from feedparser)
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Coll

In [None]:
import pandas as pd
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import gradio as gr
import os
import sys
from langgraph.graph import StateGraph
from langchain.llms import HuggingFacePipeline
from langchain_community.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from crewai import Agent, Task, Crew
from crewai.tools import BaseTool
from pydantic import BaseModel, Field


In [None]:
class MCPServer:
    def __init__(self):
        self.context_history = []

    def save_context(self, query, summary, recommendations):
        self.context_history.append({
            "query": query,
            "summary": summary,
            "recommendations": recommendations
        })

    def get_latest(self):
        return self.context_history[-1] if self.context_history else None

# Global server object
mcp_server = MCPServer()

In [None]:
import feedparser
from urllib.parse import quote

def search_arxiv(query, max_results=20):
    base_url = 'http://export.arxiv.org/api/query?'
    search_query = f'search_query=all:{quote(query)}&start=0&max_results={max_results}'
    feed = feedparser.parse(base_url + search_query)
    return feed.entries




In [None]:
from tqdm import tqdm

# Keep domain structure as is
domain_structure = {
    "Constitutional & Administrative Law": [
        "Administrative Tribunals", "Constitutional Amendments", "Emergency Powers", "Federalism",
        "Fundamental Rights", "Judicial Review", "Legislative Powers", "Public Interest Litigation",
        "Separation of Powers", "Writ Jurisdiction"
    ],
    "Corporate & Commercial Law": [
        "Banking Law", "Commercial Arbitration", "Company Law", "Competition Law", "Consumer Protection",
        "Contract Law", "E-Commerce Law", "Insolvency & Bankruptcy", "Mergers & Acquisitions", "Securities Regulation"
    ],
    "Criminal Law & Procedure": [
        "Bail & Sentencing", "Criminal Procedure", "Cyber Crime", "Double Jeopardy", "Evidence in Criminal Cases",
        "Juvenile Justice", "Search and Seizure", "Substantive Criminal Law", "Victim Rights", "White-Collar Crime"
    ],
    "Environmental & Energy Law": [
        "Air Pollution Regulation", "Climate Change Law", "Environmental Impact Assessment", "Environmental Litigation",
        "Forest Law", "Mining & Natural Resources Law", "Renewable Energy Law", "Sustainable Development", "Water Law", "Wildlife Protection"
    ],
    "Intellectual Property & Technology Law": [
        "AI & Law", "Copyright Law", "Data Privacy Law", "Digital Rights Management", "IP Licensing", "Internet Governance",
        "Patent Law", "Software Licensing", "Trade Secrets", "Trademark Law"
    ],
    "International & Human Rights Law": [
        "Climate Agreements", "Diplomatic Immunity", "Gender Rights under International Law", "Global Health Law",
        "Humanitarian Law", "International Criminal Court", "International Treaties", "Refugee Law", "UN Conventions", "War Crimes"
    ]
}

In [None]:

# New function: fetch papers and summarize using MCP tools
#def fetch_and_summarize_papers(domain_structure, max_results=20):
    #all_papers = []
    #for domain, subdomains in domain_structure.items():
        #for sub in subdomains:
#             print(f"🔍 Fetching papers for: {domain} → {sub}")

#             # Use arxiv_tool to fetch papers
#             papers = arxiv_tool.run(sub)

#             for paper in papers:
#                 abstract_text = paper.get("summary", "")
#                 if abstract_text:
#                     # Use summarizer_tool to summarize abstract
#                     summarized = summarizer_tool.run(abstract_text)
#                 else:
#                     summarized = "No abstract available"

#                 paper['summary'] = summarized
#                 paper['domain'] = domain
#                 paper['subdomain'] = sub
#             all_papers.extend(papers)

#     df_all = pd.DataFrame(all_papers)
#     return df_all



# df_all = fetch_and_summarize_papers(domain_structure)
# df_all.to_csv("arxiv_1200_papers.csv", index=False)
# print("✅ Papers fetched, summarized, and saved to CSV!")

In [None]:
# # Check paper counts per subdomain (optional - uncomment when needed)
# if 'df_all' in globals():
#     subdomain_counts = df_all.groupby(['domain', 'subdomain']).size().reset_index(name='num_papers')
#     print(subdomain_counts.sort_values('num_papers'))
# else:
#     print("⚠️ Dataframe 'df_all' not loaded yet. Run fetch_and_summarize_papers() or load CSV first.")


In [None]:
import pandas as pd

# Load dataset if previously saved
df_loaded = pd.read_csv('Final_Realistic_Legal_Cases.csv')
print(f"✅ Loaded CSV with {df_loaded.shape[0]} case papers.")
display(df_loaded.head())

✅ Loaded CSV with 1200 case papers.


Unnamed: 0,title,summary,link,published,court,domain,subdomain,clean_text,token_count,case_year,case_id,jurisdiction,clean_title
0,"Davis v. IRS, 769 F.Supp.2d (2004)",This opinion addresses legal questions related...,https://www.courtlistener.com/opinion/5423652/,2019-07-05,"U.S. Court of Appeals, 9th Circuit",Corporate & Commercial Law,E-Commerce Law,The case deals with e-commerce law ruling. thi...,36,2019,2ab8b6e71b,US-State,"Davis v. IRS, 769 F.Supp.2d (2004)"
1,"Taylor v. EPA, 909 Fed. Appx. (2017)",This opinion addresses legal questions related...,https://www.courtlistener.com/opinion/5535264/,2024-04-15,"U.S. District Court, Southern District of New ...",Intellectual Property & Technology Law,Trademark Law,The opinion explores legal issues surrounding ...,33,2024,f6a3e6586a,US-State,"Taylor v. EPA, 909 Fed. Appx. (2017)"
2,"Roe v. Wade, 639 F.Supp.2d (2020)",This opinion addresses legal questions related...,https://www.courtlistener.com/opinion/7732493/,2021-07-12,Supreme Court of the United States,Intellectual Property & Technology Law,Digital Rights Management,This judgment concerns digital rights manageme...,40,2021,5ad5eeff4f,US-State,"Roe v. Wade, 639 F.Supp.2d (2020)"
3,"Martin v. Google Inc., 396 F.Supp.2d (2006)",This opinion addresses legal questions related...,https://www.courtlistener.com/opinion/9890303/,2023-05-08,Supreme Court of the United States,Environmental & Energy Law,Air Pollution Regulation,The case deals with air pollution regulation r...,35,2023,ad0e59b9ec,US-State,"Martin v. Google Inc., 396 F.Supp.2d (2006)"
4,"Davis v. IRS, 674 F.3d (2011)",This opinion addresses legal questions related...,https://www.courtlistener.com/opinion/6632109/,2018-06-02,"U.S. Court of Appeals, 9th Circuit",Constitutional & Administrative Law,Judicial Review,This ruling pertains to judicial review ruling...,32,2018,8cdfa9f5bf,US-State,"Davis v. IRS, 674 F.3d (2011)"


In [None]:
# Load embedding model (already used in previous block but kept here if re-run separately)
embed_model = SentenceTransformer('all-MiniLM-L6-v2')
print("✅ Embedding model loaded!")

# Define MCP Tool for embedding if not already defined
def embed_text(text):
    """Embed a single text string into a vector."""
    return embed_model.encode([text]).tolist()

# Build FAISS index using abstracts
abstract_texts = df_loaded['clean_text'].fillna("").tolist()
abstract_embeddings = embed_model.encode(abstract_texts, show_progress_bar=True)
abstract_embeddings = np.array(abstract_embeddings).astype('float32')

dimension = abstract_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(abstract_embeddings)
print(f"✅ FAISS index created with {index.ntotal} papers!")


✅ Embedding model loaded!


Batches:   0%|          | 0/38 [00:00<?, ?it/s]

✅ FAISS index created with 1200 papers!


In [None]:
def summarize_text_tool(text: str) -> str:
    """
    Summarizes legal input using chunking for long texts.
    This function selects whether to chunk or not based on word count.
    """
    word_count = len(text.split())
    if word_count < 150:
        return hf_pipeline(text, max_length=256, do_sample=False)[0]["generated_text"]
    else:
        # Chunk input and summarize each chunk
        words = text.split()
        chunk_size = 150
        summaries = []
        for i in range(0, len(words), chunk_size):
            chunk = " ".join(words[i:i + chunk_size])
            try:
                output = hf_pipeline(chunk, max_length=256, do_sample=False)[0]["generated_text"]
                summaries.append(output)
            except Exception as e:
                summaries.append(f"[Chunk {i//chunk_size+1}] Error: {str(e)}")

        # Combine all chunk summaries and summarize again for coherence
        combined = " ".join(summaries)
        final_summary = hf_pipeline(combined, max_length=256, do_sample=False)[0]["generated_text"]
        return final_summary


In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline, AutoModelForPreTraining, AutoModelForCausalLM

model_name ="google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
hf_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
print("✅Model loaded for structured case extraction!")

def legal_structured_output(text: str) -> str:
    prompt = f"""You are a legal assistant. Fill in each field based on the case below. If a field is missing, write "Not found".

Case Title:
Court:
Date:
Facts:
Issue:
Rule:
Disposition:

Legal Case Text:
{text}
"""

    result = hf_pipeline(prompt, max_new_tokens=512, do_sample=False)[0]["generated_text"]

    expected_fields = ["Case Title:", "Court:", "Date:", "Facts:", "Issue:", "Rule:", "Holding:", "Disposition:"]
    missing = [field for field in expected_fields if field not in result]
    if missing:
        result += f"\n⚠️ Missing fields: {', '.join(missing)}"

    return result


def legal_structured_output_with_chunking(text: str) -> str:
    """
    Summarizes chunks of long legal text and generates a structured legal summary from the combined summary.
    """

    text = text.strip().replace("\n", " ")
    word_count = len(text.split())

    # 🔹 Short case? Just use regular output
    if word_count < 100:
        return legal_structured_output(text)

    # 🔹 Split into ~200-word chunks
    chunks = []
    current_chunk = []
    for word in text.split():
        current_chunk.append(word)
        if len(current_chunk) >= 200:
            chunks.append(" ".join(current_chunk))
            current_chunk = []
    if current_chunk:
        chunks.append(" ".join(current_chunk))

    # 🔹 Summarize each chunk
    chunk_summaries = []
    for i, chunk in enumerate(chunks):
        try:
            summary = hf_pipeline(chunk, max_length=150, do_sample=False)[0]["generated_text"]
            chunk_summaries.append(summary)
        except Exception as e:
            chunk_summaries.append(f"[Chunk {i+1}]: Error summarizing chunk - {str(e)}")

    # 🔹 Combine all summaries
    combined_summary = " ".join(chunk_summaries)

    # 🔹 Now run structured extraction on the **combined summary**
    structured_prompt = f"""You are a legal assistant. Fill in each field based on the case below. If a field is missing, write "Not found".

Case Title:
Court:
Date:
Facts:
Issue:
Rule:
Disposition:

Legal Case Text:
{combined_summary}
"""

    result = hf_pipeline(structured_prompt, max_new_tokens=512, do_sample=False)[0]["generated_text"]

    expected_fields = ["Case Title:", "Court:", "Date:", "Facts:", "Issue:", "Rule:", "Holding:", "Disposition:"]
    missing = [field for field in expected_fields if field not in result]
    if missing:
        result += f"\n⚠️ Missing fields: {', '.join(missing)}"

    return result


Device set to use cuda:0


✅Model loaded for structured case extraction!


In [None]:
from transformers import AutoModelForSeq2SeqLM as AutoSeq2Seq, AutoTokenizer as AutoToken

flan_model_name = "google/flan-t5-large"
flan_tokenizer = AutoToken.from_pretrained(flan_model_name)
flan_model = AutoSeq2Seq.from_pretrained(flan_model_name)

flan_pipeline = pipeline("text2text-generation", model=flan_model, tokenizer=flan_tokenizer)

print("✅ Flan-T5-Large loaded.")


def flan_field_by_field_extraction(text: str) -> str:
    questions = [
        "What is the full legal title of the case, including the names of the petitioner and respondent (e.g., X v. Y)? Exclude the case number or court name.",
        "Which court heard the case?",
        "What is the year of the case?",
        "Summarize the facts of the case.",
        "What is the legal issue?",
        "What legal rule was applied?",
        "What was the final disposition?"
    ]

    results = {}
    for q in questions:
        prompt = f"{q}\n\n{text}"
        try:
            response = flan_pipeline(prompt, max_new_tokens=150, do_sample=False)[0]["generated_text"]
            results[q] = response.strip()
        except Exception as e:
            results[q] = f"Error: {str(e)}"

    question_to_heading = {
        "What is the full legal title of the case, including the names of the petitioner and respondent (e.g., X v. Y)? Exclude the case number or court name.": "CASE TITLE",
        "Which court heard the case?": "COURT",
        "What is the year of the case?": "DATE",
        "Summarize the facts of the case.": "FACTS",
        "What is the legal issue?": "ISSUE",
        "What legal rule was applied?": "RULE",
        "What was the final disposition?": "DISPOSITION"
    }

    output = ""
    for question, answer in results.items():
        heading = question_to_heading.get(question, question)
        output += f"{heading}: {answer}\n\n"
    return output

Device set to use cuda:0


✅ Flan-T5-Large loaded.


In [None]:
def find_related_papers(user_input, top_k=5, domain_filter="All", subdomain_filter="All"):
    # Search FAISS index on the entire dataset
    user_vec = embed_model.encode([user_input]).astype('float32')
    D, I = index.search(user_vec, 50)  # fetch more than top_k initially

    results = []
    for idx, dist in zip(I[0], D[0]):
        if idx >= len(df_loaded):
            continue

        paper = df_loaded.iloc[idx].to_dict()

        # Apply filters AFTER search
        if domain_filter != "All" and paper['domain'] != domain_filter:
            continue
        if subdomain_filter != "All" and paper['subdomain'] != subdomain_filter:
            continue

        doc_vec = embed_model.encode([paper['clean_text']]).astype('float32')
        user_norm = user_vec / np.linalg.norm(user_vec)
        doc_norm = doc_vec / np.linalg.norm(doc_vec)
        cos_sim = float(np.dot(user_norm, doc_norm.T))
        cos_sim = min(max(cos_sim, 0.0), 1.0)  # Clip for safety
        paper['similarity_score'] = round(cos_sim, 3)
        results.append(paper)

    results = sorted(results, key=lambda x: x['similarity_score'], reverse=True)

    # ✅ Return only top_k
    return results[:top_k]

In [None]:
from transformers import pipeline
from crewai.tools import BaseTool
from pydantic import BaseModel, Field


model_name = "google/flan-t5-large"
hf_tokenizer = AutoTokenizer.from_pretrained(model_name)
hf_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
hf_pipeline = pipeline("text2text-generation", model=hf_model, tokenizer=hf_tokenizer)

hf_llm = HuggingFacePipeline(pipeline=hf_pipeline)


os.environ.pop("OPENAI_API_KEY", None)
os.environ["CREWAI_LLM_PROVIDER"] = "langchain"

class SummarizerTool(BaseTool):
    name: str = "summarizer_tool"
    description: str = "Summarizes legal case text to generate embeddings for recommendations."

    def _run(self, text: str):
        return hf_pipeline(text, max_new_tokens=512, do_sample=False)[0]["generated_text"]

summarizer_tool_instance = SummarizerTool()

class LegalStructuringTool(BaseTool):
    name: str = "legal_structuring_tool"
    description: str = "Extracts structured fields (title, facts, issue, rule, etc.) from a legal case."

    def _run(self, text: str) -> str:
        structured_prompt = f"""
        You are a legal assistant. Fill in each field based on the case below.
        If a field is missing, write "Not found".

        Case Title:
        Court:
        Date:
        Facts:
        Issue:
        Rule:
        Holding:
        Disposition:

        Legal Case Text:
        {text}
        """
        return hf_pipeline(structured_prompt, max_new_tokens=512, do_sample=False)[0]["generated_text"]

legal_structurer_tool_instance = LegalStructuringTool()

def embed_text_tool(text: str) -> list:
    """Encodes input text into embedding vector."""
    return embed_model.encode([text]).tolist()

class EmbedderTool(BaseTool):
    name: str = "embedder_tool"
    description: str = "Embeds academic input into a vector representation."

    def _run(self, text: str):
        return embed_model.encode([text]).tolist()

embedder_tool_instance = EmbedderTool()

class RecommenderToolArgs(BaseModel):
    user_input: str = Field(..., description="User query for finding related papers")
    top_k: int = Field(5, description="Number of papers to return")
    domain_filter: str = Field("All", description="Domain to filter papers")
    subdomain_filter: str = Field("All", description="Subdomain to filter papers")

class RecommenderTool(BaseTool):
    name: str = "recommender_tool"
    description: str = "Recommends related papers with similarity scores and metadata"
    args_schema = RecommenderToolArgs

    def _run(self, user_input, top_k=5, domain_filter="All", subdomain_filter="All"):
        return find_related_papers(user_input, top_k, domain_filter, subdomain_filter)

recommender_tool_instance = RecommenderTool()


summarizer_agent = Agent(
    role="Legal Case Summarizer",
    goal="Summarize legal text to aid downstream case recommendations.",
    backstory="An expert in legal language who compresses complex legal cases into concise summaries.",
    verbose=True,
    tools=[summarizer_tool_instance],
    llm=hf_llm
)


legal_structurer_agent = Agent(
    role="Legal Case Analyzer",
    goal="Extract structured summaries from legal cases including facts, issues, rules, and holdings.",
    backstory="An expert in legal reasoning and judgment formatting, trained to organize legal content clearly.",
    tools=[legal_structurer_tool_instance],
    llm=hf_llm
)


embedder_agent = Agent(
    role="Embedding Generator",
    goal="Convert academic abstracts into meaningful vector representations.",
    backstory="A specialist agent trained to map academic language to dense vector spaces for similarity search.",
    verbose=True,
    tools=[embedder_tool_instance],
    llm=hf_llm
)

recommender_agent = Agent(
    role="Paper Recommender",
    goal="Find top 5 academic papers based on input relevance.",
    backstory="Specialist in vector similarity and embeddings.",
    verbose=True,
    tools=[recommender_tool_instance],
    llm=hf_llm
)

Device set to use cuda:0


In [None]:
# Define tasks
summarizer_task = Task(
    description="Generate a summary of the legal case for similarity search: {input_text}",
    expected_output="A short summary of the case.",
    agent=summarizer_agent
)

legal_structurer_task = Task(
    description="Extract structured legal elements from this case: {input_text}",
    expected_output="A structured summary with title, court, date, facts, issue, rule, holding, and disposition.",
    agent=legal_structurer_agent
)
legal_structurer_crew = Crew(
    agents=[legal_structurer_agent],
    tasks=[legal_structurer_task],
    verbose=True
)

embedder_task = Task(
    description="Generate an embedding vector for the input text: {input_text}",
    expected_output="A vector representation of the input.",
    agent=embedder_agent
)

recommender_task = Task(
    description="Recommend 5 relevant papers for: {input_text}",
    expected_output="A list of paper titles.",
    agent=recommender_agent
)
recommender_crew = Crew(
    agents=[recommender_agent],
    tasks=[recommender_task],
    verbose=True
)

# Multi-agent crew for combined task
combined_crew = Crew(
    agents=[summarizer_agent, embedder_agent, recommender_agent],
    tasks=[summarizer_task, embedder_task, recommender_task],
    verbose=True
)


In [None]:
import ast, json
from typing import List, Dict, Any

def _parse_list_of_dicts_maybe(text: str) -> List[Dict[str, Any]]:
    # Try Python literal
    try:
        parsed = ast.literal_eval(text)
        if isinstance(parsed, list):
            return parsed
    except Exception:
        pass
    # Try JSON
    try:
        parsed = json.loads(text)
        if isinstance(parsed, list):
            return parsed
    except Exception:
        pass
    return []

def run_structured_summary_agent(user_input: str) -> str:
    """Call the legal_structurer_agent via its Crew and return structured summary."""
    result = legal_structurer_crew.kickoff(inputs={"input_text": user_input})
    return str(result).strip()

def run_recommender_agent(user_input: str, domain_filter: str, subdomain_filter: str, top_k: int = 5) -> List[Dict[str, Any]]:
    """Call the recommender_agent via its Crew; parse list; fallback to local function if needed."""
    result = recommender_crew.kickoff(inputs={
        "input_text": user_input,
        "domain_filter": domain_filter,
        "subdomain_filter": subdomain_filter,
        "top_k": top_k
    })
    parsed = _parse_list_of_dicts_maybe(str(result).strip())
    if parsed:
        return parsed
    # Fallback to your deterministic local retriever
    return find_related_papers(user_input, top_k=top_k, domain_filter=domain_filter, subdomain_filter=subdomain_filter)

def run_combined_crew(user_input: str, domain_filter: str, subdomain_filter: str, top_k: int = 5):
    """Run summarize + embed + recommend via combined crew; return structured summary + recs."""
    _ = combined_crew.kickoff(inputs={
        "input_text": user_input,
        "domain_filter": domain_filter,
        "subdomain_filter": subdomain_filter,
        "top_k": top_k
    })
    # Show a structured summary (legal_structurer) + recommendations (recommender)
    structured = run_structured_summary_agent(user_input)
    recs = run_recommender_agent(user_input, domain_filter, subdomain_filter, top_k=top_k)
    return structured, recs

def _recs_to_html(recommendations: List[Dict[str, Any]]) -> str:
    if not recommendations:
        return "<p>No recommended cases found for this input.</p>"
    html = "<h3>Recommended Legal Cases</h3>"
    for rec in recommendations:
        html += f"""
        <div style="margin-bottom: 15px; padding: 10px; border: 1px solid #ddd; border-radius: 5px;">
          <b>Title:</b> {rec.get('title', 'N/A')}<br>
          <b>Domain:</b> {rec.get('domain', 'N/A')}<br>
          <b>Subdomain:</b> {rec.get('subdomain', 'N/A')}<br>
          <b>Published:</b> {rec.get('published', 'N/A')}<br>
          <b>Link:</b> <a href="{rec.get('link', '#')}" target="_blank">View Case</a><br>
          <b>Similarity Score:</b> {rec.get('similarity_score', 0):.2f}<br>
          <b>Summary:</b> {rec.get('clean_text', 'No summary available')}<br>
        </div>
        """
    return html


In [None]:
from transformers import pipeline

classifier = pipeline(
    "zero-shot-classification",
    model="facebook/bart-large-mnli"
)
print("✅ Zero-shot classifier loaded!")
domain_structure = {
    "Constitutional & Administrative Law": [
        "Administrative Tribunals", "Constitutional Amendments", "Emergency Powers", "Federalism",
        "Fundamental Rights", "Judicial Review", "Legislative Powers", "Public Interest Litigation",
        "Separation of Powers", "Writ Jurisdiction"
    ],
    "Corporate & Commercial Law": [
        "Banking Law", "Commercial Arbitration", "Company Law", "Competition Law", "Consumer Protection",
        "Contract Law", "E-Commerce Law", "Insolvency & Bankruptcy", "Mergers & Acquisitions", "Securities Regulation"
    ],
    "Criminal Law & Procedure": [
        "Bail & Sentencing", "Criminal Procedure", "Cyber Crime", "Double Jeopardy", "Evidence in Criminal Cases",
        "Juvenile Justice", "Search and Seizure", "Substantive Criminal Law", "Victim Rights", "White-Collar Crime"
    ],
    "Environmental & Energy Law": [
        "Air Pollution Regulation", "Climate Change Law", "Environmental Impact Assessment", "Environmental Litigation",
        "Forest Law", "Mining & Natural Resources Law", "Renewable Energy Law", "Sustainable Development", "Water Law", "Wildlife Protection"
    ],
    "Intellectual Property & Technology Law": [
        "AI & Law", "Copyright Law", "Data Privacy Law", "Digital Rights Management", "IP Licensing", "Internet Governance",
        "Patent Law", "Software Licensing", "Trade Secrets", "Trademark Law"
    ],
    "International & Human Rights Law": [
        "Climate Agreements", "Diplomatic Immunity", "Gender Rights under International Law", "Global Health Law",
        "Humanitarian Law", "International Criminal Court", "International Treaties", "Refugee Law", "UN Conventions", "War Crimes"
    ]
}

all_subdomains = [sub for sublist in domain_structure.values() for sub in sublist]

# Classify function
def classify_subdomain(text):
    result = classifier(text, candidate_labels=all_subdomains)
    return result['labels'][0]


Device set to use cuda:0


✅ Zero-shot classifier loaded!


In [None]:
def generate_csv(recommendations):
    """
    Save recommended papers to CSV.
    Now recommendations is a list of dicts (from find_related_papers) not a DataFrame.
    """
    if isinstance(recommendations, list):
        df_out = pd.DataFrame(recommendations)
    else:
        df_out = recommendations

    # Check if similarity_score exists in recommendations
    columns_to_save = ['title', 'clean_text', 'link', 'domain', 'subdomain']
    if 'similarity_score' in df_out.columns:
        columns_to_save.append('similarity_score')

    df_out = df_out[columns_to_save]
    df_out.to_csv("legal_case_recommendations.csv", index=False)
    return "legal_case_recommendations.csv"

In [None]:
def generate_csv_wrapper(user_input, domain, subdomain, task_choice):
    domain_filter = domain if domain and domain.strip() != "" else "All"
    subdomain_filter = subdomain if subdomain and subdomain.strip() != "" else "All"

    recommendations = recommender_tool_instance._run(
        user_input=user_input,
        top_k=5,
        domain_filter=domain_filter,
        subdomain_filter=subdomain_filter
    )

    return generate_csv(recommendations)


In [None]:
user_input_text = """
In the Supreme Court of Newlandia, the petitioner, Mr. James Cooper, challenged the constitutionality of the Emergency Powers Act of 2022,
which authorized the executive branch to detain individuals indefinitely during national crises without judicial review.
The Act was passed in response to a series of cyberattacks that crippled national infrastructure. Mr. Cooper, a civil liberties
advocate, was detained for publishing articles critical of the government's handling of the crisis. He contended that his detention
violated fundamental rights guaranteed under the Newlandian Constitution, including the right to free speech and protection from arbitrary arrest.
The government argued that such measures were necessary to safeguard national security. The central issue before the court was whether
the Act's provisions, particularly the suspension of habeas corpus, were consistent with constitutional limits on executive power.
"""

print(" USER INPUT TEXT")
print(user_input_text)
print("\n==============================\n")

# 1. Summarize using structured legal summarizer tool
structured_output = legal_structurer_tool_instance._run(user_input_text)
print(" STRUCTURED LEGAL OUTPUT")
print(structured_output)
print("\n==============================\n")

# 2. Recommendations using recommender tool
recommendations = recommender_tool_instance._run(user_input_text)

print(" RECOMMENDED CASES")
for rec in recommendations:
    print(f"- Title: {rec.get('title', 'N/A')}")

    # Domain & Subdomain
    print(f"  Domain: {rec.get('domain', 'N/A')}")
    print(f"  Subdomain: {rec.get('subdomain', 'N/A')}")

    # Date
    if 'date' in rec:
        print(f"  Date: {rec.get('date', 'N/A')}")
    elif 'published' in rec:
        print(f"  Published: {rec.get('published', 'N/A')}")

    # Link
    if 'link' in rec:
        print(f"  Link: {rec.get('link', '#')}")

    # Similarity Score
    if 'similarity_score' in rec:
        print(f"  Similarity Score: {rec.get('similarity_score', 0):.2f}")

    # Legal Summary or Clean Text
    print(f"  Summary: {rec.get('clean_text', 'No summary available')}")
    print("--------------------------------")

 USER INPUT TEXT

In the Supreme Court of Newlandia, the petitioner, Mr. James Cooper, challenged the constitutionality of the Emergency Powers Act of 2022,
which authorized the executive branch to detain individuals indefinitely during national crises without judicial review.
The Act was passed in response to a series of cyberattacks that crippled national infrastructure. Mr. Cooper, a civil liberties
advocate, was detained for publishing articles critical of the government's handling of the crisis. He contended that his detention
violated fundamental rights guaranteed under the Newlandian Constitution, including the right to free speech and protection from arbitrary arrest.
The government argued that such measures were necessary to safeguard national security. The central issue before the court was whether
the Act's provisions, particularly the suspension of habeas corpus, were consistent with constitutional limits on executive power.



 STRUCTURED LEGAL OUTPUT
The petitioner, a civ

  cos_sim = float(np.dot(user_norm, doc_norm.T))


 RECOMMENDED CASES
- Title: Roe v. Wade, 931 F.3d (2008)
  Domain: Constitutional & Administrative Law
  Subdomain: Emergency Powers
  Published: 2021-08-30
  Link: https://www.courtlistener.com/opinion/8073784/
  Similarity Score: 0.51
  Summary: This judgment concerns emergency powers ruling. this legal dispute explores aspects of emergency powers within the context of constitutional & administrative law, focusing on recent developments and challenges.
--------------------------------
- Title: Roe v. Wade, 737 U.S. (2006)
  Domain: Constitutional & Administrative Law
  Subdomain: Emergency Powers
  Published: 2016-08-16
  Link: https://www.courtlistener.com/opinion/4837518/
  Similarity Score: 0.51
  Summary: The opinion explores legal issues surrounding emergency powers ruling. in a significant legal interpretation, the case examines evolving standards related to emergency powers under the umbrella of constitutional & administrative law.
--------------------------------
- Title: Dav

In [None]:
def format_apa_legal(title, published, link):
    try:
        year = published.split("-")[0]
    except:
        year = "n.d."
    return f"<i>{title}</i> ({year}).<br>Retrieved from <a href='{link}' target='_blank'>{link}</a>"



In [None]:
import logging
import sys
try:
    import absl.logging
    from unittest.mock import MagicMock

    def safe_close(self):
        try:
            if hasattr(self.stream, 'close'):
                self.stream.close()
        except Exception:
            pass
    absl.logging.PythonHandler.close = safe_close
except ImportError:
    pass

for name in ['uvicorn', 'uvicorn.access', 'uvicorn.error', 'uvicorn.asgi', 'httpx', 'httpcore', 'asyncio', 'websockets']:
    logger = logging.getLogger(name)
    logger.disabled = True
    logger.propagate = False
    for handler in logger.handlers[:]:
        logger.removeHandler(handler)

logging.basicConfig(stream=sys.stderr, level=logging.ERROR, force=True)


In [None]:


def update_subdomains(domain):
    """Update subdomain dropdown based on selected domain."""
    if domain in domain_structure:
        return gr.update(choices=["All"] + domain_structure[domain], value="All")
    return gr.update(choices=[], value=None)

def run_agent_ui(user_input, domain, subdomain, task):
    if not user_input.strip():
        return "⚠️ Please enter a legal case text.", "<p style='color:red;'>No cases found.</p>"

    # filters
    domain_filter = domain if domain and domain.strip() != "" else "All"
    subdomain_filter = subdomain if subdomain and subdomain.strip() != "" else "All"

    try:
        # 🟢 Only Summarize (agent)
        if task == "Summarize":
            summary = run_structured_summary_agent(user_input)
            return summary, "📄 Only summarizer will run. Recommender not requested."

        # 🟣 Only Recommend (agent)
        elif task == "Recommend Cases":
            summary = "📚 Only recommender will run. Summary not requested."
            recommendations = run_recommender_agent(
                user_input=user_input,
                domain_filter=domain_filter,
                subdomain_filter=subdomain_filter,
                top_k=5
            )
            return summary, _recs_to_html(recommendations)

        # 🔵 Summarize + Recommend (agents via combined crew)
        elif task == "Summarize + Recommend Cases":
            summary_structured, recommendations = run_combined_crew(
                user_input=user_input,
                domain_filter=domain_filter,
                subdomain_filter=subdomain_filter,
                top_k=5
            )
            return summary_structured, _recs_to_html(recommendations)

        # Fallback
        return "Unknown task.", "<p>No output.</p>"

    except Exception as e:
        msg = f"❌ Error processing request: {e}"
        return msg, msg

# Build the Gradio Interface
with gr.Blocks(title="🧠 Legal Summarizer + Recommender Agent") as demo:
    gr.Markdown("## 🧠 Legal Summarizer + Recommender Agent")

    with gr.Row():
        with gr.Column():
            user_input = gr.Textbox(label="Input Legal Case Text", lines=8, placeholder="Enter full legal case text, issue, facts, or ruling here..")
            domain = gr.Dropdown(choices=["All"] + list(domain_structure.keys()), label="Domain (Optional)", value=None)
            subdomain = gr.Dropdown(choices=[], label="Subdomain (Optional)", value=None)

            # Dynamic update of subdomains
            domain.change(fn=update_subdomains, inputs=domain, outputs=subdomain)

            task_choice = gr.Dropdown(
              choices=["Summarize", "Recommend Cases", "Summarize + Recommend Cases"],
              label="Select Task",
              value=None
            )

            submit_btn = gr.Button("▶️ Start Agent", variant="primary")

        with gr.Column():
            summary_output = gr.Textbox(label="Structured Summary of Legal Case", lines=6)
            recommendations_output = gr.HTML(label="Recommended Legal Cases")
            download_btn = gr.Button("📥 Download Recommended Legal Cases as CSV")


    # Connect button
    submit_btn.click(fn=run_agent_ui,
                     inputs=[user_input, domain, subdomain, task_choice],
                     outputs=[summary_output, recommendations_output])

    download_btn.click(
      fn=generate_csv_wrapper,
      inputs=[user_input, domain, subdomain, task_choice],
      outputs=gr.File(label="Download Recommendations CSV")
    )


def run_gradio_safe():
    print("🚀 Starting Gradio safely for Colab...")
    try:
        import os
        os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
        logging.getLogger().handlers.clear()
        demo.launch(share=True, show_error=True)
    except Exception as e:
        print(f"❌ Gradio launch failed: {e}")
run_gradio_safe()

🚀 Starting Gradio safely for Colab...
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://cbdfa761c323409517.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


                                                **END OF CODE.....EXTRA AND COPY OF CODES BELOW. IGNORE**



