In [12]:
# Install required libraries (including tenacity for retries)
!pip install google-generativeai faiss-cpu pandas matplotlib tenacity "chromadb==0.6.3"



In [13]:
# Import libraries
import os
import time
import google.generativeai as genai
from google import genai as genai2
from google.genai import types
import pandas as pd
import faiss
import numpy as np
from tenacity import retry, wait_exponential
from IPython.display import display
import ipywidgets as widgets
import logging
from IPython.display import Markdown


In [14]:
# Load API key from Kaggle Secrets
from google.colab import userdata
api_key = userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=api_key)


In [15]:
# List models with default page size (50) [[2]]
models = genai.list_models()

# Print model names
for model in models:
    print(f"Model name: {model.name}")

Model name: models/chat-bison-001
Model name: models/text-bison-001
Model name: models/embedding-gecko-001
Model name: models/gemini-1.0-pro-vision-latest
Model name: models/gemini-pro-vision
Model name: models/gemini-1.5-pro-latest
Model name: models/gemini-1.5-pro-001
Model name: models/gemini-1.5-pro-002
Model name: models/gemini-1.5-pro
Model name: models/gemini-1.5-flash-latest
Model name: models/gemini-1.5-flash-001
Model name: models/gemini-1.5-flash-001-tuning
Model name: models/gemini-1.5-flash
Model name: models/gemini-1.5-flash-002
Model name: models/gemini-1.5-flash-8b
Model name: models/gemini-1.5-flash-8b-001
Model name: models/gemini-1.5-flash-8b-latest
Model name: models/gemini-1.5-flash-8b-exp-0827
Model name: models/gemini-1.5-flash-8b-exp-0924
Model name: models/gemini-2.5-pro-exp-03-25
Model name: models/gemini-2.5-pro-preview-03-25
Model name: models/gemini-2.0-flash-exp
Model name: models/gemini-2.0-flash
Model name: models/gemini-2.0-flash-001
Model name: models/

In [16]:
# Initialize recommended models [[1]][[3]]
core_model = genai.GenerativeModel('gemini-2.0-flash-thinking-exp')       # For reasoning/RAG
embedding_model = genai.GenerativeModel('text-embedding-004')     # For vector search
vision_model = genai.GenerativeModel('gemini-2.0-flash')         # For images
flash_model = genai.GenerativeModel('gemini-1.5-flash-8b')        # Lightweight tasks
lcw_model = genai.GenerativeModel('gemini-1.5-pro-latest') #Long context window model (2M tokens)
audio_model = genai.GenerativeModel('gemini-1.5-pro-latest')
client = genai2.Client(api_key=api_key)

# Load real dataset (replace with your own)
# Example: Crime case data from https://www.kaggle.com/datasets
#df = pd.read_csv("/kaggle/input/crime-data/cases.csv")  # Replace with your dataset

In [17]:
# Process a large case file (e.g., 10,000+ tokens, long context window)
with open("/content/drive/MyDrive/Gen AI Project/case.txt", "r") as f:
    long_case = f.read()

response = core_model.generate_content(f"Summarize key suspects: {long_case}")
print(response.text)

The key suspects in this case are:

1.  **Asif Sayeed:** He is the individual who wholly owns **Management Principles, Inc. (MPI)** and is the central figure in devising the scheme to bypass the Healthcare Consortium of Illinois' referral process.

2.  **Management Principles, Inc. (MPI):** This is Sayeed's healthcare management company. MPI signed the Management Services Agreement with the Healthcare Consortium, through which they gained access to patient data. MPI then used this data to solicit patients and forward them to Vital and Physician Care. MPI made payments to the Consortium under the guise of "management services" which were actually kickbacks for patient referrals (in the form of data access).

3.  **Vital Home & Healthcare:** This is one of the smaller healthcare companies managed by MPI. Vital provided home-based medical services to Medicare recipients, billed Medicare for services to patients obtained through the scheme, and split the fees with MPI.

4.  **Physician Car

In [18]:
few_shot_examples = """
Example 1:
Clues: ["Alibi: Home at 8 PM", "Car seen leaving at 7:45 PM"]
Anomaly: "Alibi conflict: Car movement contradicts stated location."

Example 2:
Clues: ["No forced entry", "Phone destroyed"]
Anomaly: "Phone destruction suggests evidence tampering."

Example 3:
Clues: ["Victim’s phone pinged Tower A at 8 PM", "Suspect’s phone pinged Tower B at 8:05 PM"]
Anomaly: "Geolocation mismatch suggests suspect was elsewhere."
"""

@retry(wait=wait_exponential(multiplier=1, min=4, max=10))
def detect_anomalies(clues):
    try:
        prompt = f"""
        {few_shot_examples}

        New Clues:
        {clues}

        Identify anomalies:
        """
        return core_model.generate_content(prompt).text
    except Exception as e:
        logging.error(f"Anomaly detection failed: {e}")
        return "Error analyzing anomalies"

In [19]:
test_cases = [
    (
        ["Suspect reported $20k annual income", "Purchased luxury car worth $80k in cash"],
        "Financial anomaly"
    ),
    (
        ["Suspect attended meeting in New York at 3 PM", "Witness saw suspect in Los Angeles at 3:30 PM"],
        "Geospatial impossibility"
    ),
    (
        ["Crime scene report: No weapons found", "Bloody knife discovered in suspect's trunk"],
        "Evidence contradiction"
    ),
    (
        ["Suspect claims to fear heights", "Security footage shows suspect on rooftop at 2 AM"],
        "Behavioral anomaly"
    )
]

for clues, case_type in test_cases:
    print(f"\nTesting {case_type}:")
    print(detect_anomalies(clues))


Testing Financial anomaly:
Anomaly: "Financial anomaly: Cash purchase of luxury car inconsistent with reported income."

Testing Geospatial impossibility:
Anomaly: "Geographic impossibility: Suspect cannot be in New York and Los Angeles within 30 minutes."

Testing Evidence contradiction:
Anomaly: "Weapon anomaly: Knife in suspect's trunk contrasts 'no weapon at scene'."

Testing Behavioral anomaly:
Anomaly: "Height fear contradiction: Rooftop presence contradicts claimed fear of heights."


In [20]:

# Step 5: Analyze image (supports URLs or local files)
def analyze_image(image_source):
    try:
        # Case 1: Public URL (e.g., "https://example.com/image.jpg")
        if image_source.startswith(("http://", "https://")):
            response = vision_model.generate_content(
                [image_source, "Describe this image for a criminal investigation."]
            )
        # Case 2: Local file path (e.g., "/kaggle/input/crime_scene.jpg")
        else:
            response = vision_model.generate_content(
                [image_source, "Describe this image for a criminal investigation."]
            )
        return response.text
    except Exception as e:
        return f"Error: {str(e)}"

In [21]:
# Test with a public URL
#image_url = "https://example.com/crime_scene.jpg"
#print(analyze_image(image_url))

# Test with a local file (ensure the file exists in your Kaggle environment)
image_path = "/content/drive/MyDrive/Gen AI Project/gettyimages-588357220-170667a.jpg"
print(analyze_image(image_path))

Okay, I will analyze the image to provide a detailed description for a criminal investigation, focusing on potentially relevant details. Please keep in mind I am an AI, and this is just an interpretation. A real investigation would require human expertise.

**General Description:**

The image appears to depict an interior scene, possibly a residence. It shows a room that is in a state of significant disarray. The lighting is somewhat dim, possibly from natural light filtering through a window, but this is not clear. The overall impression is one of neglect and potential chaos.

**Specific Details:**

*   **Dominant Feature: Mess and Disarray:** The most striking aspect is the extreme untidiness. Objects are scattered throughout the room. There is clutter on the floor, on furniture, and potentially on other surfaces not fully visible.
*   **Furniture/Objects:**
    *   There appears to be a bed or a couch (partially visible). The bedding or cushions may be dishevelled.
    *   There are

In [22]:
# Upload audio file to Gemini for processing (supports MP3/WAV/FLAC ≤25MB)
audio_file = genai.upload_file(path="/content/drive/MyDrive/Gen AI Project/criminal_test.mp3")

# Define analysis prompt to guide Gemini's evaluation of the testimony
prompt = "What do you think of this suspect's testimony"

# Define analysis prompt to guide Gemini's evaluation of the testimony
response = audio_model.generate_content([prompt, audio_file])

# Print Gemini's analysis results
print(response.text)

This testimony raises several red flags suggesting potential guilt or, at the very least, that the suspect is withholding information:

* **Overly defensive and repetitive:** The suspect repeatedly insists they did nothing wrong, even before being directly accused.  Phrases like "I swear, officer" and "I didn't do anything wrong" are used multiple times, which can be a sign of nervousness and an attempt to overcompensate.
* **Unsolicited information and changing story:** The suspect offers details without being prompted, such as walking past the gas station and not going inside.  Later, they change their story, saying they might have seen someone running out but couldn't get a good look.  The evolving narrative suggests they are trying to manage the information they reveal.
* **Specific denials:**  Focusing on specifics like "I didn't touch the cash register" or "I've never even stolen a candy bar" feels overly precise and can indicate an attempt to deflect attention from other potenti

In [23]:
# Ask questions about cases with RAG system
CASE1="On April 10, 2025, downtown Metropolis witnessed a daring burglary at a high-end jewelry store late one night. At around 11:30 PM, local police responded to frantic calls about the incident. Surveillance footage revealed a masked individual using specialized tools to disarm the alarm system. Investigators have gathered crucial evidence, such as fingerprints and forensic samples. The case remains active as detectives continue piecing together the events through witness interviews and further footage review."
CASE2="In the industrial district on March 15, 2025, an abandoned warehouse became the scene of an unsettling break-in. Late in the evening, concerned residents reported suspicious activity in the area, which prompted a swift investigation. Security cameras later captured an unidentified person near the warehouse, and early findings indicated that the intruder had tampered with the building's security systems. Authorities recovered digital logs, partial footprints, and burglary tools from the site. The investigation is still ongoing, with forensic experts closely examining the evidence."
CASE3="On February 20, 2025, a serene suburban neighborhood was shocked by a bold bank robbery. Around midday, an armed suspect entered a local bank, swiftly overpowering the security measures in place. Witnesses described the event as both chaotic and frightening as the suspect executed the crime with apparent precision. Banking security cameras captured the entire sequence, showing the suspect's rapid actions and subsequent escape. Detectives have collected vital evidence, including the surveillance footage, detailed witness statements, and a discarded mask. The incident has raised community concern, and the investigation continues with multiple leads under review."

documents=[CASE1, CASE2, CASE3]

In [24]:
from logging import config
from chromadb import Documents, EmbeddingFunction, Embeddings
from google.api_core import retry
from google.genai import types

# Define a helper to retry when per-minute quota is reached.
is_retriable = lambda e: (isinstance(e, genai.errors.APIError) and e.code in {429, 503})

class GeminiEmbeddingFunction(EmbeddingFunction):
    # boolean to specify if the generation is for documents or queries
    document_mode=True

    @retry.Retry(predicate=is_retriable)
    def __call__(self, input: Documents) -> Embeddings:
        if self.document_mode:
            embedding_task = "retrieval_document"
        else:
            embedding_task = "retrieval_query"

        response = client.models.embed_content(
            model="models/text-embedding-004",
            contents=input,
            config=types.EmbedContentConfig(
                task_type=embedding_task,
            ),
        )
        return [e.values for e in response.embeddings]

In [25]:
import chromadb

DB_NAME = "casesdb"

embed_fn = GeminiEmbeddingFunction()
embed_fn.document_mode = True

chroma_client = chromadb.Client()
db = chroma_client.get_or_create_collection(name=DB_NAME, embedding_function=embed_fn)

db.add(documents=documents, ids=[str(i) for i in range(len(documents))])

In [26]:
# Switch to query mode when generating embeddings.
embed_fn.document_mode = False

# Search the Chroma DB using the specified query.
query = "what specific evidence was gathered during the jewelry store burglary?"

result = db.query(query_texts=[query], n_results=1)
[all_passages] = result["documents"]

Markdown(all_passages[0])

On April 10, 2025, downtown Metropolis witnessed a daring burglary at a high-end jewelry store late one night. At around 11:30 PM, local police responded to frantic calls about the incident. Surveillance footage revealed a masked individual using specialized tools to disarm the alarm system. Investigators have gathered crucial evidence, such as fingerprints and forensic samples. The case remains active as detectives continue piecing together the events through witness interviews and further footage review.

In [27]:
query_oneline = query.replace("\n", " ")

# This prompt is where you can specify any guidance on tone, or what topics the model should stick to, or avoid.
prompt = f"""You are a helpful and informative bot that answers questions using text from the reference passage included below.
Be sure to respond in a complete sentence, being comprehensive, including all relevant background information.
However, you are talking to a non-technical audience, so be sure to break down complicated concepts and
strike a friendly and converstional tone. If the passage is irrelevant to the answer, you may ignore it.

QUESTION: {query_oneline}
"""

# Add the retrieved documents to the prompt.
for passage in all_passages:
    passage_oneline = passage.replace("\n", " ")
    prompt += f"PASSAGE: {passage_oneline}\n"

print(prompt)

You are a helpful and informative bot that answers questions using text from the reference passage included below.
Be sure to respond in a complete sentence, being comprehensive, including all relevant background information.
However, you are talking to a non-technical audience, so be sure to break down complicated concepts and
strike a friendly and converstional tone. If the passage is irrelevant to the answer, you may ignore it.

QUESTION: what specific evidence was gathered during the jewelry store burglary?
PASSAGE: On April 10, 2025, downtown Metropolis witnessed a daring burglary at a high-end jewelry store late one night. At around 11:30 PM, local police responded to frantic calls about the incident. Surveillance footage revealed a masked individual using specialized tools to disarm the alarm system. Investigators have gathered crucial evidence, such as fingerprints and forensic samples. The case remains active as detectives continue piecing together the events through witness i

In [28]:
answer = core_model.generate_content(
    contents=prompt
    )

Markdown(answer.text)

During the jewelry store burglary, investigators gathered specific evidence like fingerprints and forensic samples to help them solve the case.

In [29]:
from tenacity import retry, wait_exponential

@retry(wait=wait_exponential(multiplier=1, min=4, max=10))
def quick_summary(file_path):
    try:
        # Read text from the file
        with open(file_path, "r", encoding="utf-8") as file:
            text = file.read()

        # Generate summary with Gemini Flash (fast model)
        response = flash_model.generate_content(
            f"Summarize this in 1 sentence: {text}"
        )
        return response.text
    except FileNotFoundError:
        return "Error: The file was not found. Check the path."
    except Exception as e:
        return f"Error: {str(e)}"

# Example usage
print(quick_summary("/content/drive/MyDrive/Gen AI Project/case.txt"))  # Replace with your file path

The Seventh Circuit Court of Appeals affirmed a $6 million judgment against healthcare companies for violating the Anti-Kickback Statute and False Claims Act, but remanded the case for the district court to determine which Medicare claims were directly attributable to the defendants' illegal kickback scheme.



In [30]:
# Sample case data (text clues)
data = {
    "clue_id": [1, 2, 3, 4],
    "text": [
        "Victim last seen near park at 8 PM [[1]]",
        "Witness heard argument in alleyway [[2]]",
        "Suspect's car spotted 2 miles from scene [[3]]",
        "Security footage shows figure in red jacket [[4]]"
    ]
}
df = pd.DataFrame(data)

def generate_embedding(text):
    # Create embeddings for semantic search [[1]]
    """response = client.models.embed_content(
            model="models/text-embedding-004",
            texts=[text],
            task_type="retrieval_document"
        )
    return response.embeddings[0].values"""
    response = client.models.embed_content(
            model="models/text-embedding-004",
            contents=[text],
            config=types.EmbedContentConfig(
                task_type="retrieval_document",
            ),
        )
    return [e.values for e in response.embeddings]

# Generate embeddings for all clues
df["embedding"] = df["text"].apply(generate_embedding)

# Build FAISS vector index [[2]]
embeddings = np.array(df["embedding"].tolist())
# Reshape the embeddings to 2 dimensions
embeddings = embeddings.reshape(embeddings.shape[0], embeddings.shape[2])  # Reshape to (num_embeddings, embedding_dim)
dimension = embeddings.shape[1]
faiss_index = faiss.IndexFlatL2(dimension)
faiss_index.add(embeddings)

def rag_query(query, top_k=2):
    # Retrieve relevant clues [[9]]
    query_embedding = generate_embedding(query)
    # Reshape query_embedding to 2D
    query_embedding = np.array(query_embedding).reshape(1, -1)
    distances, indices = faiss_index.search(query_embedding, top_k)
    relevant_clues = df.iloc[indices[0]]["text"].tolist()

    # Generate structured response [[9]]
    prompt = f"""
    Use these clues to answer in JSON format:
    {relevant_clues}

    Query: {query}
    JSON keys: hypothesis, confidence, related_clues
    """
    response = core_model.generate_content(prompt)
    return response.text

# Example RAG output
print(rag_query("Who was near the park?"))
# Output: {"hypothesis": "Victim and suspect near park", "confidence": 0.85, "related_clues": [1, 4]}

```json
{
  "hypothesis": "Victim",
  "confidence": "high",
  "related_clues": [1]
}
```
