In [24]:
import os
import requests
from dotenv import load_dotenv
from IPython.display import Markdown, display
import google.generativeai as genai

In [25]:
# Load environment variables in a file called .env and load openai
load_dotenv(override=True)
api_key = os.getenv('GOOGLE_API_KEY')
# Use a personal access token (PAT) for authentication. This allows access to private repositories and avoids low request limits.
# You can generate a token at: https://github.com/settings/tokens
github_token = os.getenv('GITHUB_TOKEN')

genai.configure(api_key=api_key)

In [26]:
def extract_diff_from_pr(pr_url: str) -> str:
    parts = pr_url.rstrip("/").split("/")
    owner, repo, pr_number = parts[3], parts[4], parts[6]
    api_url = f"https://github.com/{owner}/{repo}/pull/{pr_number}.diff"
    headers = {
        "Accept": "application/vnd.github.v3.diff",
        "Authorization": f"token {github_token}"
    }

    response = requests.get(api_url, headers=headers)
    response.raise_for_status()
    
    return response.text

In [27]:
system_prompt = """YYou are a Senior Java Architect specializing in clean code and maintainability. Your primary directive is to perform a constructive, detailed code review. Focus areas MUST include:

Java Best Practices (e.g., proper use of final, static, encapsulation).

Design Principles (e.g., SOLID, separation of concerns).

Resource Management (e.g., closing Scanner objects).

Naming Conventions (standard Java and CamelCase adherence).

Security and Performance (e.g., avoiding unnecessary I/O in calculation methods).

Present your final review under two main sections: 'Major Issues and Refactoring Suggestions' and 'Minor Style and Naming Issues'. Respond only in English and use GitHub-flavored Markdown for clarity, including code blocks for examples."""

In [28]:
user_prompt_prefix= """Review the following Java code (specifically designed to be low quality).

Please analyze the code against the standards set in your system prompt. For each issue found, state the problem clearly and provide the corrected code snippet or architectural suggestion.

Your final output must adhere to the two-section structure:

1. Major Issues and Refactoring Suggestions
2. Minor Style and Naming Issues"""


In [29]:
def user_prompt_for(code_diffs: str) -> str:
    return f"{user_prompt_prefix.strip()}\n\n{code_diffs.strip()}"


In [30]:
def code_review_for(code_diffs: str) -> dict:
    user_prompt = user_prompt_for(code_diffs)
    return {
        "system": system_prompt,
        "user": user_prompt
    }

In [31]:
def get_gemini_model():
    """Get the best available Gemini model that supports generateContent, preferring stable models"""
    available_models = []
    try:
        print("Checking available Gemini models...")
        for model in genai.list_models():
            if 'generateContent' in model.supported_generation_methods:
                model_name = model.name.split('/')[-1]  # Get just the model name part
                available_models.append(model_name)
                print(f"‚úì Found: {model_name}")
        
        if available_models:
            # Prefer stable models over preview/experimental ones for better free tier access
            # Priority order: stable > flash > pro > preview/experimental
            preferred_order = [
                'gemini-2.5-flash',      # Stable flash model - best for free tier
                'gemini-2.5-pro',        # Stable pro model
                'gemini-2.0-flash-exp',   # Experimental but commonly available
                'gemini-1.5-flash',      # Older stable flash
                'gemini-1.5-pro',        # Older stable pro
            ]
            
            # Try to find a preferred model first
            for preferred in preferred_order:
                if preferred in available_models:
                    print(f"\n‚úì Selected preferred model: {preferred}")
                    return preferred
            
            # If no preferred model found, use first available (but avoid preview/exp if possible)
            for model in available_models:
                if 'preview' not in model.lower() and 'exp' not in model.lower():
                    print(f"\n‚úì Selected stable model: {model}")
                    return model
            
            # Last resort: use first available
            selected = available_models[0]
            print(f"\n‚úì Using first available model: {selected}")
            return selected
    except Exception as e:
        print(f"Could not list models: {e}")
        print("Will try common model names...")
    
    # Fallback: try common model names in order until one works
    fallback_models = ['gemini-2.5-flash', 'gemini-1.5-flash', 'gemini-1.5-pro', 'gemini-2.0-flash-exp']
    print(f"\nTrying fallback models in order...")
    for model_name in fallback_models:
        try:
            # Test if the model works by creating it (doesn't make an API call)
            test_model = genai.GenerativeModel(model_name)
            print(f"‚úì Model '{model_name}' is available")
            return model_name
        except Exception as e:
            print(f"‚úó Model '{model_name}' not available: {str(e)[:50]}")
            continue
    
    # If all else fails, return the first fallback (user will see error)
    print(f"\n‚ö†Ô∏è  Warning: Could not verify any model. Using '{fallback_models[0]}' as default.")
    return fallback_models[0]

    # Store the model name for use in other cells
GEMINI_MODEL = get_gemini_model()
print(f"\nüìå Using model: {GEMINI_MODEL}")
print("üí° Tip: If you hit quota limits, wait a few minutes or try a different model.")

Checking available Gemini models...
‚úì Found: gemini-2.5-pro-vtea-da-csi
‚úì Found: gemini-2.5-pro-preview-03-25
‚úì Found: gemini-2.5-flash
‚úì Found: gemini-2.5-pro-preview-05-06
‚úì Found: gemini-2.5-pro-preview-06-05
‚úì Found: gemini-2.5-pro
‚úì Found: gemini-2.0-flash-exp
‚úì Found: gemini-2.0-flash
‚úì Found: gemini-2.0-flash-001
‚úì Found: gemini-2.0-flash-exp-image-generation
‚úì Found: gemini-2.0-flash-lite-001
‚úì Found: gemini-2.0-flash-lite
‚úì Found: gemini-2.0-flash-lite-preview-02-05
‚úì Found: gemini-2.0-flash-lite-preview
‚úì Found: gemini-2.0-pro-exp
‚úì Found: gemini-2.0-pro-exp-02-05
‚úì Found: gemini-exp-1206
‚úì Found: gemini-2.0-flash-thinking-exp-01-21
‚úì Found: gemini-2.0-flash-thinking-exp
‚úì Found: gemini-2.0-flash-thinking-exp-1219
‚úì Found: gemini-2.5-flash-preview-tts
‚úì Found: gemini-2.5-pro-preview-tts
‚úì Found: learnlm-2.0-flash-experimental
‚úì Found: gemma-3-1b-it
‚úì Found: gemma-3-4b-it
‚úì Found: gemma-3-12b-it
‚úì Found: gemma-3-27b-it
‚úì 

In [32]:
def start_review(pr):
    prompt_parts = code_review_for(extract_diff_from_pr(pr))
    model = genai.GenerativeModel(
        GEMINI_MODEL,
        system_instruction=prompt_parts["system"]
    )
    
    try:
        response = model.generate_content(prompt_parts["user"])
        return response.text
    except Exception as e:
        error_msg = str(e)
        return f"‚ùå Error: {error_msg[:200]}"

In [33]:
def display_code_review(pr_link):
    code_review = start_review(pr_link)
    display(Markdown(code_review))

In [34]:
display_code_review("https://github.com/jvkvasanth/Spark/pull/1")

‚ùå Error: 403 Your API key was reported as leaked. Please use another API key.