In [None]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel
from typing import List, Optional, Dict, Any
import logging
from enum import Enum
from github import Github
from github.GithubException import GithubException
import os

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class IssueType(str, Enum):
    BUG = "bug"
    CODE_SMELL = "code_smell"
    STYLE = "style"
    MISSING_TEST = "missing_test"
    MISSING_DOC = "missing_documentation"

class CodeIssue(BaseModel):
    type: IssueType
    description: str
    line_number: Optional[int]
    suggested_fix: Optional[str]

class CodeReviewResult(BaseModel):
    summary: str
    changes_description: str
    issues: List[CodeIssue]
    pr_description: str

os.environ['OPENAI_API_KEY'] = ""
GITHUB_TOKEN = ""
REPO_NAME = ""
NEW_BRANCH = ""

In [2]:
class CodeReviewAssistant:
    def __init__(self,  repo_name: str, head_branch: str, base_branch:str = "main", github_token: Optional[str] = None, model_name: str = "gpt-4-turbo"):
        """
        Initialize the code review assistant with OpenAI and optional GitHub credentials.
        
        Args:
            repo_name (str): Git repo name
            head_branch (str): Head branch name
            base_branch (str): Base branch name
            github_token (Optional[str]): GitHub personal access token
            model_name (str): Name of the OpenAI model to use
        """
        self.llm = ChatOpenAI(temperature=0, model_name=model_name)
        output_parser = PydanticOutputParser(pydantic_object=CodeReviewResult)
        self.github = Github(github_token) if github_token else None
        self.repo = self.github.get_repo(repo_name)
        self.head_branch = head_branch
        self.base_branch = base_branch
        self.repo_name = repo_name
        self.diff = self.get_git_diff()
        
        print("Diff between branches generated")

        review_template = """
        You are a senior software engineer reviewing a pull request. Please analyze the following git diff:

        {diff}

        Analyze the changes and provide:
        1. A brief summary of the changes
        2. A detailed description of what changed
        3. Any potential issues, bugs, code smells, or missing tests/documentation
        4. A pull request description that can be used directly

        {format_instructions}
        """

        review_prompt = ChatPromptTemplate.from_template(
            template=review_template,
            partial_variables={
                "format_instructions": output_parser.get_format_instructions()
            }
        )

        self.review_chain = review_prompt | self.llm | output_parser
        self.review_result = self.review_code(self.diff)

        print("Review results generated")
        print(self.review_result)

    def get_git_diff(self, ) -> str:
        """
        Generates the git diff
        """
        comparison = self.repo.compare(self.base_branch, self.head_branch)
        git_diff = """"""
        for file in comparison.files:
            git_diff+=f"\nFile: {file.filename}"
            git_diff+=f"Status: {file.status}"
            git_diff+=f"Additions: {file.additions}"
            git_diff+=f"Deletions: {file.deletions}"
            git_diff+=f"Changes: {file.changes}"    
            # Print the actual diff/patch
            if file.patch:  # Some files might not have a patch (like binary files)
                git_diff+="\nDiff:"
                git_diff+=file.patch
        return git_diff
 
    def review_code(self, diff: str) -> CodeReviewResult:
        """
        Review the provided git diff and return structured feedback.
        
        Args:
            diff (str): The git diff to review
            
        Returns:
            CodeReviewResult: Structured review feedback
            
        Raises:
            Exception: If the LLM call fails
        """
        try:
            result = self.review_chain.invoke({"diff": diff})
            return result
        except Exception as e:
            logger.error(f"Error during code review: {str(e)}")
            raise

    def generate_review_comment(self, ) -> str:
        """
        Generate a formatted review comment from the review result.
                    
        Returns:
            str: Formatted review comment
        """
        comment = f"""
        # Code Review Summary
        {self.review_result.summary}

        ## Changes Overview
        {self.review_result.changes_description}

        ## Issues Found
        """
        if self.review_result.issues:
            for issue in self.review_result.issues:
                comment += f"""
                    ### {issue.type.value.title()}
                    - Description: {issue.description}
                    - Line: {issue.line_number if issue.line_number else 'N/A'}
                    """
                if issue.suggested_fix:
                    comment += f"- Suggested Fix:\n```python\n{issue.suggested_fix}\n```\n"
        else:
            comment += "\nNo issues found.\n"

        comment += f"""
        ## Pull Request Description
        {self.review_result.pr_description}
        """
        return comment

    def create_pull_request(self, draft: bool = False) -> Dict[str, Any]:
        """
        Create a pull request on GitHub with the review results.
        
        Args:
            draft (bool): Whether to create as a draft PR
            
        Returns:
            Dict[str, Any]: Pull request information
            
        Raises:
            ValueError: If GitHub integration is not configured
            GithubException: If PR creation fails
        """

        try:            
            # Create the PR description
            pr_body = self.generate_review_comment()
            
            # Create the pull request
            pr = self.repo.create_pull(
                title=self.review_result.summary,
                body=pr_body,
                base=self.base_branch,
                head=self.head_branch,
                draft=draft
            )
            
            # Add review comments for each issue
            for issue in self.review_result.issues:
                if issue.line_number:
                    pr.create_review_comment(
                        body=f"**{issue.type.value.title()}**: {issue.description}\n"
                             f"Suggested fix: {issue.suggested_fix if issue.suggested_fix else 'None'}",
                        commit_id=pr.head.sha,
                        path="",  # You'll need to determine the correct file path
                        line=issue.line_number
                    )
            
            return {
                "pr_number": pr.number,
                "pr_url": pr.html_url,
                "status": "created",
                "review_comments_added": len(self.review_result.issues)
            }
            
        except GithubException as e:
            logger.error(f"Failed to create pull request: {str(e)}")
            raise

    def get_pull_request(self, pr_number: int) -> Dict[str, Any]:
        """
        Get information about an existing pull request.
        
        Args:
            pr_number (int): Pull request number
            
        Returns:
            Dict[str, Any]: Pull request information
        """

        try:
            pr = self.repo.get_pull(pr_number)
            
            return {
                "number": pr.number,
                "title": pr.title,
                "state": pr.state,
                "url": pr.html_url,
                "created_at": pr.created_at,
                "updated_at": pr.updated_at,
                "comments": pr.comments
            }
        except GithubException as e:
            logger.error(f"Failed to get pull request: {str(e)}")
            raise

In [3]:
# Initialize the assistant with both OpenAI and GitHub credentials
assistant = CodeReviewAssistant(repo_name = REPO_NAME, head_branch = NEW_BRANCH, base_branch = "main", github_token=GITHUB_TOKEN)

# Create pull request
pr_info = assistant.create_pull_request(draft=True)

print(f"Created PR: {pr_info['pr_url']}")

# Get PR details
pr_details = assistant.get_pull_request(pr_number=pr_info['pr_number'])
print(f"PR Status: {pr_details['state']}")

Diff between branches generated


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Review results generated
summary='The pull request introduces several changes to the data processing and analysis functions, including a new class for data analysis, modifications to the data processing logic, and a restructuring of the metrics calculation.' changes_description="The changes in the pull request include: 1. The data processing function 'process_data' now multiplies items by 1.5 instead of 2 and uses a new variable name 'res' for results. 2. The 'analyze_results' function has been replaced with 'calculate_metrics', which now also calculates the total and uses a manual method to find the maximum value instead of using the built-in 'max' function. 3. A new class 'DataAnalyzer' has been introduced to manage a list of data and extend it with new numbers. 4. The main execution block now includes examples of using both the 'process_data' function and the 'DataAnalyzer' class." issues=[CodeIssue(type=<IssueType.BUG: 'bug'>, description="The 'calculate_metrics' function will rais