In [9]:
# Install required package for direct API communication with Gemini via LangChain
!pip install --upgrade langchain-google-genai





In [10]:
# import google.colab.userdata

# # Load the Gemini API key securely from Colab secrets.
# api_key = google.colab.userdata.get('API_KEY')

# print("Gemini API key has been loaded from Colab secrets.")
# ...existing code...
from dotenv import load_dotenv
import os

load_dotenv()  # loads project .env

api_key = os.getenv("API_KEY")
if not api_key:
    raise RuntimeError("API_KEY missing. Add API_KEY to your .env file.")

# strip accidental surrounding quotes
if api_key.startswith('"') and api_key.endswith('"'):
    api_key = api_key[1:-1]
if api_key.startswith("'") and api_key.endswith("'"):
    api_key = api_key[1:-1]

# print a masked prefix to confirm loaded value without exposing full key
print("API key loaded, prefix:", api_key[:8] + "…")
# ...existing code...
print(api_key)

API key loaded, prefix: AIzaSyCx…
AIzaSyCxKS_4nxUKREl2uER3A-FQXvEkHbBHZcY


In [11]:
def analyze_code(code_snippet: str):
  """
  Analyzes a given code snippet using the Gemini API and returns structured analysis results.

  Args:
    code_snippet (str): The code to be analyzed.

  Returns:
    dict: A dictionary containing the analysis results.
  """
  pass

In [None]:
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

genai.configure(api_key=api_key)

def analyze_code(code_snippet: str):
  """
  Analyzes a given code snippet using the Gemini API via LangChain and returns structured analysis results.

  Args:
    code_snippet (str): The code to be analyzed.

  Returns:
    str: A JSON string containing the analysis results.
  """
  llm = ChatGoogleGenerativeAI(model="gemini-2.5-pro", google_api_key=api_key)

  prompt_template = ChatPromptTemplate.from_messages([
      ("system", "You are a helpful assistant that analyzes code for potential issues and returns the analysis in a structured JSON format."),
      ("human", "Analyze the following code snippet for potential issues. \nProvide the analysis in a structured JSON format, including 'title', 'type', 'severity' (e.g., 'Low', 'Medium', 'High', 'Critical'), 'lineNumber', 'description', and 'suggestedFix' for each issue found.\n\nCode:\n```python\n{code_snippet}\n```\n\nExample JSON format for issues:\n{{\"issues\": [{{\"title\": \"Issue Title\", \"type\": \"Bug\", \"severity\": \"High\", \"lineNumber\": 10, \"description\": \"Detailed description of the issue.\", \"suggestedFix\": \"Recommended fix for the issue.\"}}]}}")
  ])

  output_parser = StrOutputParser()

  chain = prompt_template | llm | output_parser

  response = chain.invoke({"code_snippet": code_snippet})

  return response

sample_code = """def divide(a, b):
    return a / b

result = divide(10, 0)"""

analysis_result = analyze_code(sample_code)
print(analysis_result)

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised PermissionDenied: 403 Your API key was reported as leaked. Please use another API key..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 4.0 seconds as it raised PermissionDenied: 403 Your API key was reported as leaked. Please use another API key..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 8.0 seconds as it raised PermissionDenied: 403 Your API key was reported as leaked. Please use another API key..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 16.0 seconds as it raised PermissionDenied: 403 Your API key was reported as leaked. Please use another API key..


In [None]:

sample_code = """import sqlite3

def process_data(data_list):
    result = []
    for i in range(len(data_list)):
        if data_list[i] == None:   # Issue: bad None check
            print("Found None at index", i)
        result.append(data_list[i].strip())  # Potential AttributeError if None
    return result

def insecure_query(user_input):
    conn = sqlite3.connect("users.db")
    cursor = conn.cursor()
    # SQL injection vulnerability
    query = f"SELECT * FROM users WHERE name = '{user_input}'"
    cursor.execute(query)
    rows = cursor.fetchall()
    conn.close()
    return rows

def divide_numbers(a, b):
    # No error handling for division by zero
    return a / b

unused_variable = 42  # Declared but never used

def main():
    data = ["Alice", None, "Bob", "Charlie"]
    processed = process_data(data)
    print("Processed:", processed)

    # Dangerous: user input directly passed
    name = input("Enter username: ")
    print(insecure_query(name))

    print(divide_numbers(10, 0))  # Runtime error

if __name__ == "__main__":
    main()
"""

analysis_result = analyze_code(sample_code)
print(analysis_result)

```json
{
  "issues": [
    {
      "title": "SQL Injection Vulnerability",
      "type": "Security",
      "severity": "Critical",
      "lineNumber": 14,
      "description": "The database query is constructed using an f-string with user-provided input. This makes the application vulnerable to SQL injection attacks, where a malicious user could manipulate the query to access, modify, or delete data without authorization.",
      "suggestedFix": "Use parameterized queries to safely pass user input to the database. This separates the query logic from the data. Change the query execution to: `cursor.execute(\"SELECT * FROM users WHERE name = ?\", (user_input,))`."
    },
    {
      "title": "Potential AttributeError on None",
      "type": "Bug",
      "severity": "High",
      "lineNumber": 8,
      "description": "The code attempts to call the `.strip()` method on an element from `data_list`. If an element is `None` (which is checked for on line 6), this line will still execute and r

In [None]:
sample_code_complex = """import sqlite3

def process_data(data_list):
    result = []
    for i in range(len(data_list)):
        if data_list[i] == None:   # Issue: bad None check
            print("Found None at index", i)
        result.append(data_list[i].strip())  # Potential AttributeError if None
    return result

def insecure_query(user_input):
    conn = sqlite3.connect("users.db")
    cursor = conn.query()
    # SQL injection vulnerability
    query = f"SELECT * FROM users WHERE name = '{user_input}'"
    cursor.execute(query)
    rows = cursor.fetchall()
    conn.close()
    return rows

def divide_numbers(a, b):
    # No error handling for division by zero
    return a / b

unused_variable = 42  # Declared but never used

def main():
    data = ["Alice", None, "Bob", "Charlie"]
    processed = process_data(data)
    print("Processed:", processed)

    # Dangerous: user input directly passed
    name = input("Enter username: ")
    print(insecure_query(name))

    print(divide_numbers(10, 0))  # Runtime error

if __name__ == "__main__":
    main()
"""

# First, get the raw analysis result using the analyze_code function
raw_analysis_output = analyze_code(sample_code_complex)
print("\n--- Raw Analysis Output from Gemini ---\n")
print(raw_analysis_output)



--- Raw Analysis Output from Gemini ---

```json
{
  "issues": [
    {
      "title": "SQL Injection Vulnerability",
      "type": "Vulnerability",
      "severity": "Critical",
      "lineNumber": 14,
      "description": "The database query is constructed using an f-string with user-provided input. This makes the application vulnerable to SQL injection attacks, where a malicious user could alter the query's logic to access, modify, or delete data.",
      "suggestedFix": "Use parameterized queries to safely pass user input to the database. Replace the f-string with a query that uses a placeholder (e.g., '?') and pass the user input as a separate argument to `cursor.execute()`. For example: `cursor.execute(\"SELECT * FROM users WHERE name = ?\", (user_input,))`."
    },
    {
      "title": "Potential AttributeError on None",
      "type": "Bug",
      "severity": "High",
      "lineNumber": 8,
      "description": "The code attempts to call the `.strip()` method on an element from `

In [None]:
import json
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

def get_code_metrics(code_snippet: str) -> dict:
  """
  Calculates summary metrics and issue distribution by directly querying the Gemini API.

  Args:
    code_snippet (str): The code to be analyzed for metrics.

  Returns:
    dict: A dictionary containing 'summary_metrics' and 'issue_distribution' from Gemini.
  """
  genai.configure(api_key=api_key)
  llm = ChatGoogleGenerativeAI(model="gemini-2.5-pro", google_api_key=api_key)

  prompt_template = ChatPromptTemplate.from_messages([
      ("system", "You are a helpful assistant that analyzes code and provides summary metrics and issue distribution in a structured JSON format."),
      ("human", """Analyze the following code snippet and provide the analysis in a structured JSON format. I need two main sections: 'summary_metrics' and 'issue_distribution'.\n\nFor 'summary_metrics', include:\n- 'code_quality_score' (an integer from 0-100 where higher is better)\n- 'security_rating' (an integer from 0-100 where higher is better)\n- 'bug_density' (count of bugs/runtime errors)\n- 'critical_issue_count' (count of critical severity issues)\n\nFor 'issue_distribution', include:\n- 'security_vulnerabilities' (count of security/vulnerability issues)\n- 'code_smells' (count of code smell issues)\n- 'best_practices' (count of best practice violations, if any)\n- 'performance_issues' (count of performance-related issues, if any)\n
Ensure the output is a single JSON object. Here's an example of the desired JSON format:\n```json\n{{\n  "summary_metrics": {{\n    "code_quality_score": 85,\n    "security_rating": 90,\n    "bug_density": 1,\n    "critical_issue_count": 0\n  }},\n  "issue_distribution": {{\n    "security_vulnerabilities": 0,\n    "code_smells": 2,\n    "best_practices": 1,\n    "performance_issues": 0\n  }}\n}}\n```\n
Code:\n```python\n{code_snippet}\n```""")
  ])

  output_parser = StrOutputParser()

  chain = prompt_template | llm | output_parser

  response = chain.invoke({"code_snippet": code_snippet})

  # Remove markdown code block if present in the response
  if response.startswith('```json') and response.endswith('```'):
    response = response.replace('```json\n', '', 1)
    response = response.replace('\n```', '', 1)

  try:
    parsed_response = json.loads(response)
    return parsed_response
  except json.JSONDecodeError:
    print(f"Error: Invalid JSON string received from model: {response}")
    return {
        "summary_metrics": {},
        "issue_distribution": {}
    }

In [None]:
sample_code_complex = """import sqlite3

def process_data(data_list):
    result = []
    for i in range(len(data_list)):
        if data_list[i] == None:   # Issue: bad None check
            print("Found None at index", i)
        result.append(data_list[i].strip())  # Potential AttributeError if None
    return result

def insecure_query(user_input):
    conn = sqlite3.connect("users.db")
    cursor = conn.cursor()
    # SQL injection vulnerability
    query = f"SELECT * FROM users WHERE name = '{user_input}'"
    cursor.execute(query)
    rows = cursor.fetchall()
    conn.close()
    return rows

def divide_numbers(a, b):
    # No error handling for division by zero
    return a / b

unused_variable = 42  # Declared but never used

def main():
    data = ["Alice", None, "Bob", "Charlie"]
    processed = process_data(data)
    print("Processed:", processed)

    # Dangerous: user input directly passed
    name = input("Enter username: ")
    print(insecure_query(name))

    print(divide_numbers(10, 0))  # Runtime error

if __name__ == "__main__":
    main()
"""

code_metrics_and_distribution = get_code_metrics(sample_code_complex)

print("\n--- Code Metrics and Issue Distribution (from direct Gemini API call) ---\n")
print(json.dumps(code_metrics_and_distribution, indent=2))


--- Code Metrics and Issue Distribution (from direct Gemini API call) ---

{
  "summary_metrics": {
    "code_quality_score": 15,
    "security_rating": 20,
    "bug_density": 2,
    "critical_issue_count": 1
  },
  "issue_distribution": {
    "security_vulnerabilities": 1,
    "code_smells": 2,
    "best_practices": 1,
    "performance_issues": 0
  }
}


In [None]:
def inject_bugs(code_snippet: str, bug_type: str, severity_level: int, num_bugs: int) -> dict:
  """
  Injects specified types and number of bugs into a given code snippet.

  Args:
    code_snippet (str): The original code snippet where bugs will be injected.
    bug_type (str): The type of bug to inject (e.g., 'SQL Injection', 'Division by Zero').
    severity_level (int): The severity level of the bugs (e.g., 1 for low, 5 for critical).
    num_bugs (int): The number of bugs to inject.

  Returns:
    dict: A dictionary containing the modified code with injected bugs and details
          about the injected bugs (e.g., their locations, types, and severities).
  """
  pass

In [None]:
import json
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

def inject_bugs(code_snippet: str, bug_type: str, severity_level: int, num_bugs: int) -> dict:
  """
  Injects specified types and number of bugs into a given code snippet using the Gemini API.

  Args:
    code_snippet (str): The original code snippet where bugs will be injected.
    bug_type (str): The type of bug to inject (e.g., 'SQL Injection', 'Division by Zero').
    severity_level (int): The severity level of the bugs (e.g., 1 for low, 5 for critical).
    num_bugs (int): The number of bugs to inject.

  Returns:
    dict: A dictionary containing the modified code with injected bugs and details
          about the injected bugs (e.g., their locations, types, and severities).
  """
  genai.configure(api_key=api_key)
  llm = ChatGoogleGenerativeAI(model="gemini-2.5-pro", google_api_key=api_key)

  prompt_template = ChatPromptTemplate.from_messages([
      ("system", "You are a helpful assistant that injects bugs into code based on given parameters and returns the modified code and bug details in JSON format."),
      ("human", """Inject {num_bugs} bugs of type '{bug_type}' with severity level {severity_level} into the following Python code snippet.\nProvide the output in a structured JSON format with two keys: 'buggy_code' (containing the full modified code) and 'bugs_injected' (an array of objects, where each object describes an injected bug with 'type', 'line_number', and 'description').\n\nCode:\n```python\n{code_snippet}\n```\n\nExample JSON format:\n{{\n  "buggy_code": "def example_function():\n    # Some example code without further template variables\n    return 0",\n  "bugs_injected": [\n    {{\n      "type": "{bug_type}", "line_number": 2, "description": "Description of the injected bug."\n    }}\n  ]\n}}\n""")
  ])

  output_parser = StrOutputParser()

  chain = prompt_template | llm | output_parser

  response = chain.invoke({
      "code_snippet": code_snippet,
      "num_bugs": num_bugs,
      "bug_type": bug_type,
      "severity_level": severity_level
  })

  # Remove markdown code block if present in the response
  if response.startswith('```json') and response.endswith('```'):
    response = response.replace('```json\n', '', 1)
    response = response.replace('\n```', '', 1)

  try:
    parsed_response = json.loads(response)
    return parsed_response
  except json.JSONDecodeError:
    print(f"Error: Invalid JSON string received from model: {response}")
    return {
        "buggy_code": code_snippet, # Return original code on error
        "bugs_injected": []
    }

In [None]:
sample_code_for_bug_injection = """def calculate_area(length, width):
    return length * width

def greet(name):
    print(f"Hello, {name}!")

if __name__ == "__main__":
    area = calculate_area(5, 10)
    greet("World")
"""

bug_injection_result = inject_bugs(
    code_snippet=sample_code_for_bug_injection,
    bug_type="Logic Error",
    severity_level=3,
    num_bugs=2
)

print("\n--- Bug Injection Result ---\n")
print(json.dumps(bug_injection_result, indent=2))


--- Bug Injection Result ---

{
  "buggy_code": "def calculate_area(length, width):\n    return length + width\n\ndef greet(name):\n    print(f\"Hello, {name}!\")\n\nif __name__ == \"__main__\":\n    area = calculate_area(5, 10)\n    greet(area)",
  "bugs_injected": [
    {
      "type": "Logic Error",
      "line_number": 2,
      "description": "The `calculate_area` function incorrectly uses addition (+) instead of multiplication (*), causing it to calculate the sum of the dimensions rather than the area."
    },
    {
      "type": "Logic Error",
      "line_number": 9,
      "description": "The `greet` function is called with the integer variable 'area' instead of the intended string 'World', causing the program to greet a number instead of the expected name."
    }
  ]
}


In [None]:
sample_code_for_bug_injection = """import sqlite3

def process_data(data_list):
    result = []
    for i in range(len(data_list)):
        if data_list[i] == None:   # Issue: bad None check
            print("Found None at index", i)
        result.append(data_list[i].strip())  # Potential AttributeError if None
    return result

def insecure_query(user_input):
    conn = sqlite3.connect("users.db")
    cursor = conn.cursor()
    # SQL injection vulnerability
    query = f"SELECT * FROM users WHERE name = '{user_input}'"
    cursor.execute(query)
    rows = cursor.fetchall()
    conn.close()
    return rows

def divide_numbers(a, b):
    # No error handling for division by zero
    return a / b

unused_variable = 42  # Declared but never used

def main():
    data = ["Alice", None, "Bob", "Charlie"]
    processed = process_data(data)
    print("Processed:", processed)

    # Dangerous: user input directly passed
    name = input("Enter username: ")
    print(insecure_query(name))

    print(divide_numbers(10, 0))  # Runtime error

if __name__ == "__main__":
    main()
"""

bug_injection_result = inject_bugs(
    code_snippet=sample_code_for_bug_injection,
    bug_type="Logic Error",
    severity_level=3,
    num_bugs=2
)

print("\n--- Bug Injection Result ---\n")
print(json.dumps(bug_injection_result, indent=2))


--- Bug Injection Result ---

{
  "buggy_code": "import sqlite3\n\ndef process_data(data_list):\n    result = []\n    for i in range(len(data_list) - 1): # Injected Bug 1\n        if data_list[i] == None:   # Issue: bad None check\n            print(\"Found None at index\", i)\n        result.append(data_list[i].strip())  # Potential AttributeError if None\n    return result\n\ndef insecure_query(user_input):\n    conn = sqlite3.connect(\"users.db\")\n    cursor = conn.cursor()\n    # SQL injection vulnerability\n    query = f\"SELECT * FROM users WHERE name = '{user_input}'\"\n    cursor.execute(query)\n    rows = cursor.fetchall()\n    conn.close()\n    return rows\n\ndef divide_numbers(a, b):\n    # No error handling for division by zero\n    return a // b # Injected Bug 2\n\nunused_variable = 42  # Declared but never used\n\ndef main():\n    # Modified data to avoid original crash and demonstrate the injected bug\n    data = [\"Alice  \", \"  Bob\", \"Charlie  \", \"David\"]\n    