In [1]:
from openai import OpenAI
import requests
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv("local.env")

def review_code_with_gpt4(code_diff, openai_api_key):
    """
    Sends the code diff to GPT-4 and receives feedback.
    
    :param code_diff: The code differences as a string.
    :param openai_api_key: Your OpenAI API key.
    :return: The feedback from GPT-4 as a string.
    """
    client = OpenAI(api_key = openai_api_key)
    try:
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[
                {
                    "role":"user", 
                    "content":f"Review the following github PR code changes:\n\n{code_diff}\n\nProvide detailed constructive feedback to improve the code given. When suggesting changes, please show the line before and after change."
                }
            ]
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"An error occurred: {e}")
        return "Failed to get feedback from GPT-4."
    
# Function to fetch PR details including the diff
def fetch_pr_diff(owner, repo, pr_number, github_token=None):
    url = f"https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}"
    headers = {"Accept": "application/vnd.github.v3.diff"}
    if github_token:
        headers["Authorization"] = f"token {github_token}"
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.text  # Returns the diff of the PR
    else:
        print("Failed to fetch PR details. Status Code:", response.status_code)
        return None

def filter_diff_based_on_extensions(diff_lines, exclude_extensions=["ipynb"]):
    """
    Filters out diffs from files with specified extensions.

    :param diff_lines: A list of lines from the diff.
    :param exclude_extensions: A list of file extensions to exclude.
    :return: A list of filtered lines, excluding file diffs with specified extensions.
    """
    filtered_diff = []
    skip_chunk = False

    for line in diff_lines.split("\n"):
        if line.startswith('diff --git'):
            # Check if this diff chunk is for a file with an excluded extension
            skip_chunk = any(line.endswith(f".{ext}") for ext in exclude_extensions)
        if not skip_chunk:
            filtered_diff.append(line)

    return filtered_diff

def split_diff_and_review(code_diff, openai_api_key, max_chars=24000):
    """
    Splits the diff into chunks based on a character count approximation to stay within token limits.
    """
    # Initialize variables
    feedbacks = []
    chunk = ""

    # Splitting the diff more accurately by character count
    for line in code_diff:
        # Check if adding the next line would exceed the max character count
        if len(chunk) + len(line) + 1 > max_chars:  # +1 for newline character
            print(chunk)
            # Review the current chunk
            feedback = review_code_with_gpt4(chunk, openai_api_key)
            feedbacks.append(feedback)
            # Start a new chunk
            chunk = line
        else:
            # Add the line to the current chunk
            chunk += f"\n{line}"

    # Don't forget to process the last chunk if it's not empty
    if chunk:
        feedback = review_code_with_gpt4(chunk, openai_api_key)
        feedbacks.append(feedback)

    return " ".join(feedbacks)

# Example usage
owner = "wkda"
repo = "ds-damage-vip-services"
pr_number = 137
github_token = os.getenv("GITHUB_API_KEY")
openai_api_key = os.getenv("OPEN_AI_API_KEY")

print(f"Fetching PR #{pr_number} diff from {owner}/{repo}...")
pr_diff = fetch_pr_diff(owner, repo, pr_number, github_token)
print("Processing diff code")
pr_diff_filtered = filter_diff_based_on_extensions(pr_diff)
print("Reviewing code diff")
feedback = split_diff_and_review(pr_diff_filtered, openai_api_key)



Fetching PR #137 diff from wkda/ds-damage-vip-services...
Processing diff code
Reviewing code diff
