In [None]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from typing import List, Optional, Dict, Any
import logging
from enum import Enum
from github import Github
from github.GithubException import GithubException
import os

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

os.environ['OPENAI_API_KEY'] = ""
GITHUB_TOKEN = ""
REPO_NAME = ""
NEW_BRANCH = ""

In [2]:
class IssueType(str, Enum):
    BUG = "bug"
    CODE_SMELL = "code_smell"
    STYLE = "style"
    MISSING_TEST = "missing_test"
    MISSING_DOC = "missing_documentation"
    PERFORMANCE = "performance"
    SECURITY = "security"

class CodeSnippet(BaseModel):
    """Represents a code suggestion or fix"""
    description: str = Field(..., description="Description of what this code snippet addresses")
    before: Optional[str] = Field(None, description="Original code if this is a modification")
    after: str = Field(..., description="The suggested code")
    file_path: Optional[str] = Field(None, description="The file path this snippet applies to")
    start_line: Optional[int] = Field(None, description="Starting line number in the original file")
    
class CodeIssue(BaseModel):
    type: IssueType
    description: str
    line_number: Optional[int]
    file_path: Optional[str]
    suggested_fix: Optional[CodeSnippet]

class ImprovementSuggestion(BaseModel):
    """Represents a general improvement suggestion"""
    title: str = Field(..., description="Title of the improvement")
    description: str = Field(..., description="Detailed description of the improvement")
    code_snippet: Optional[CodeSnippet] = Field(None, description="Code example for the improvement")
    priority: str = Field(..., description="Priority level (high/medium/low)")

class CodeReviewResult(BaseModel):
    summary: str
    changes_description: str
    issues: List[CodeIssue]
    improvements: List[ImprovementSuggestion] = Field(
        default_factory=list,
        description="General improvement suggestions"
    )
    pr_description: str

In [3]:
class CodeReviewAssistant:
    def __init__(self,  repo_name: str, head_branch: str, base_branch:str = "main", github_token: Optional[str] = None, model_name: str = "gpt-4-turbo"):
        """
        Initialize the code review assistant with OpenAI and optional GitHub credentials.
        
        Args:
            repo_name (str): Git repo name
            head_branch (str): Head branch name
            base_branch (str): Base branch name
            github_token (Optional[str]): GitHub personal access token
            model_name (str): Name of the OpenAI model to use
        """
        self.llm = ChatOpenAI(temperature=0, model_name=model_name)
        output_parser = PydanticOutputParser(pydantic_object=CodeReviewResult)
        self.github = Github(github_token) if github_token else None
        self.repo = self.github.get_repo(repo_name)
        self.head_branch = head_branch
        self.base_branch = base_branch
        self.repo_name = repo_name
        self.diff = self.get_git_diff()
        
        print("Diff between branches generated")

        review_template = """
        You are a senior software engineer reviewing a pull request. Please analyze the following git diff thoroughly:

        {diff}

        Analyze the changes and provide a detailed review including:

        1. A short summary of the changes (one lined or phrase)
        2. A detailed description of what changed, including the rationale behind major changes
        3. Critical Analysis:
           - Identify potential bugs, security issues, or code smells
           - Performance considerations
           - Architecture and design patterns
           - Code style and readability issues
           - Missing tests or documentation
        4. Specific code improvement suggestions:
           - Provide concrete code snippets for fixes
           - Include both small fixes and larger refactoring suggestions
           - For each suggestion, show the current code (if applicable) and the improved version
        5. A professional pull request description that summarizes all key points

        For each issue or suggestion:
        - Include the file path and line numbers where applicable
        - Provide detailed explanations of why the change is recommended
        - Include complete, working code snippets that can be directly applied
        - Consider edge cases and potential impact on other parts of the codebase

        Be thorough and specific in your suggestions. Include actual code snippets that demonstrate best practices, 
        proper error handling, and improved patterns.

        {format_instructions}
        """

        review_prompt = ChatPromptTemplate.from_template(
            template=review_template,
            partial_variables={
                "format_instructions": output_parser.get_format_instructions()
            }
        )

        self.review_chain = review_prompt | self.llm | output_parser
        self.review_result = self.review_code(self.diff)

        print("Review results generated")
        print(self.review_result)

    def get_git_diff(self, ) -> str:
        """
        Generates the git diff
        """
        comparison = self.repo.compare(self.base_branch, self.head_branch)
        git_diff = ""
        for file in comparison.files:
            git_diff+=f"File: {file.filename}\n"
            git_diff+=f"Status: {file.status}\n"
            git_diff+=f"Additions: {file.additions}\n"
            git_diff+=f"Deletions: {file.deletions}\n"
            git_diff+=f"Changes: {file.changes}\n"    
            # Print the actual diff/patch
            if file.patch:  # Some files might not have a patch (like binary files)
                git_diff+="\nDiff:\n"
                git_diff += f"{file.patch}\n"
        return git_diff
 
    def review_code(self, diff: str) -> CodeReviewResult:
        """
        Review the provided git diff and return structured feedback.
        
        Args:
            diff (str): The git diff to review
            
        Returns:
            CodeReviewResult: Structured review feedback
            
        Raises:
            Exception: If the LLM call fails
        """
        try:
            result = self.review_chain.invoke({"diff": diff})
            return result
        except Exception as e:
            logger.error(f"Error during code review: {str(e)}")
            raise

    def generate_review_comment(self) -> str:
        """Generate a formatted review comment with enhanced code snippets"""
        comment = f"""
            # Code Review Summary
            {self.review_result.summary}

            ## Changes Overview
            {self.review_result.changes_description}

            ## Issues Found
            """
        if self.review_result.issues:
            for issue in self.review_result.issues:
                comment += f"""
                    ### {issue.type.value.title()}
                    - Description: {issue.description}
                    - Location: {issue.file_path}:{issue.line_number if issue.line_number else 'N/A'}
                    """
                if issue.suggested_fix:
                    comment += f"""
                        #### Suggested Fix:
                        **Description**: {issue.suggested_fix.description}

                        """
                    if issue.suggested_fix.before:
                        comment += f"""
                            Current Code:
                            ```python
                            {issue.suggested_fix.before}
                            ```
                            """
                    comment += f"""
                        Improved Code:
                        ```python
                        {issue.suggested_fix.after}
                        ```
                        """

        comment += "\n## Improvement Suggestions\n"
        for improvement in self.review_result.improvements:
            comment += f"""
                ### {improvement.title}
                - Priority: {improvement.priority}
                - Description: {improvement.description}
                """
            if improvement.code_snippet:
                comment += f"""
                    #### Implementation Example:
                    """
                if improvement.code_snippet.before:
                    comment += f"""
                        Current Pattern:
                        ```python
                        {improvement.code_snippet.before}
                        ```
                        """
                comment += f"""
                    Suggested Implementation:
                    ```python
                    {improvement.code_snippet.after}
                    ```
                    """

        comment += f"""
            ## Pull Request Description
            {self.review_result.pr_description}
            """
        return comment

    def create_pull_request(self, draft: bool = False) -> Dict[str, Any]:
        """
        Create a pull request on GitHub with the review results.
        
        Args:
            draft (bool): Whether to create as a draft PR
            
        Returns:
            Dict[str, Any]: Pull request information
            
        Raises:
            ValueError: If GitHub integration is not configured
            GithubException: If PR creation fails
        """

        try:            
            # Create the PR description
            pr_body = self.generate_review_comment()
            
            # Create the pull request
            pr = self.repo.create_pull(
                title=self.review_result.summary,
                body=pr_body,
                base=self.base_branch,
                head=self.head_branch,
                draft=draft
            )
            
            # Get the last commit of the PR
            commits = list(pr.get_commits())
            last_commit = commits[-1] if commits else None
            
            # Create a review with comments
            if self.review_result.issues and last_commit:
                comments = []
                for issue in self.review_result.issues:
                    if issue.line_number and issue.file_path:
                        comments.append({
                            'path': issue.file_path,
                            'position': issue.line_number,
                            'body': f"**{issue.type.value.title()}**: {issue.description}\n"
                                f"Suggested fix: {issue.suggested_fix.after if issue.suggested_fix else 'None'}"
                        })
                
                if comments:
                    pr.create_review(
                        commit=last_commit,
                        comments=comments,
                        event='COMMENT'  # Can be 'APPROVE', 'REQUEST_CHANGES', or 'COMMENT'
                    )
            
            return {
                "pr_number": pr.number,
                "pr_url": pr.html_url,
                "status": "created",
                "review_comments_added": len(self.review_result.issues)
            }
            
        except GithubException as e:
            logger.error(f"Failed to create pull request: {str(e)}")
            raise

    def get_pull_request(self, pr_number: int) -> Dict[str, Any]:
        """
        Get information about an existing pull request.
        
        Args:
            pr_number (int): Pull request number
            
        Returns:
            Dict[str, Any]: Pull request information
        """

        try:
            pr = self.repo.get_pull(pr_number)
            
            return {
                "number": pr.number,
                "title": pr.title,
                "state": pr.state,
                "url": pr.html_url,
                "created_at": pr.created_at,
                "updated_at": pr.updated_at,
                "comments": pr.comments
            }
        except GithubException as e:
            logger.error(f"Failed to get pull request: {str(e)}")
            raise

In [5]:
# Initialize the assistant with both OpenAI and GitHub credentials
assistant = CodeReviewAssistant(repo_name = REPO_NAME, head_branch = NEW_BRANCH, base_branch = "main", github_token=GITHUB_TOKEN)

# Create pull request
pr_info = assistant.create_pull_request(draft=True)

print(f"Created PR: {pr_info['pr_url']}")

# Get PR details
pr_details = assistant.get_pull_request(pr_number=pr_info['pr_number'])
print(f"PR Status: {pr_details['state']}")

Diff between branches generated


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Review results generated
summary='Refactoring and enhancement of data processing and analysis functions with added class structure for better data management.' changes_description="The pull request introduces several changes to the data processing script. The function 'process_data' now multiplies items by 1.5 instead of 2, and variable names have been changed for clarity. A new function 'calculate_metrics' replaces 'analyze_results', calculating total, average, and maximum without using built-in functions for max. A new class 'DataAnalyzer' has been added for managing data collections, and example usage in the main block demonstrates the new structure and functionality." issues=[CodeIssue(type=<IssueType.BUG: 'bug'>, description="The 'calculate_metrics' function will raise a ZeroDivisionError if an empty list is passed.", line_number=20, file_path='datat_proc.py', suggested_fix=CodeSnippet(description='Add a check for an empty list before dividing by the length.', before='avg = total 