# BugRx: Automated Github issue fixes using AI

In [60]:
# Collect dependencies. To be moved to requirements file.
#!pip install gitpython
#!pip install requests

In [None]:
import git
import os
import shutil

In [54]:
# Copy all content from the Github Repository to a local folder

def clone_and_copy(repo_url, destination_folder, target_path=None):
    """Clones a GitHub repository and copies specified files/folders to a local destination.

    Args:
        repo_url (str): The HTTPS URL of the GitHub repository.
        destination_folder (str): The local folder where the code will be copied.
        target_path (str, optional): Specific file or folder within the repo to copy.
                                     If None, the entire repo is copied. Defaults to None.
    """

    try:
        # Create the destination folder if it doesn't exist
        os.makedirs(destination_folder, exist_ok=True)

        # Clone the repository
        repo = git.Repo.clone_from(repo_url, destination_folder)

        # If a target path is specified, copy only that path
        if target_path:
            source_path = os.path.join(destination_folder, target_path)
            if os.path.exists(source_path):
                shutil.copytree(source_path, destination_folder, dirs_exist_ok=True)
            else:
                print(f"Error: Target path '{target_path}' not found in the repository.")
        else:
            # Otherwise, copy the entire repository
            print("Copying entire Github directory")
            shutil.copytree(destination_folder, destination_folder, ignore=shutil.ignore_patterns('.git'))

        print(f"Code from '{repo_url}' copied successfully to '{destination_folder}'")

    except Exception as e:
        print(f"An error occurred: {e}")


if __name__ == "__main__":
    repo_url = "https://github.com/jasmeetsb/Google_Generative_AI_Samples"  # Replace with your actual repo URL

    repo_url = "https://github.com/cuda-mode/lectures"
    
    destination_folder = "./my_code_copy" 
    target_path = "sample_data/"  # Optional; set to None to copy everything

    clone_and_copy(repo_url, destination_folder, target_path)


Error: Target path 'sample_data/' not found in the repository.
Code from 'https://github.com/cuda-mode/lectures' copied successfully to './my_code_copy'


## Copy contents of all files into a single text file which will then be passed to the LLM

In [55]:


def pipe_files_to_single_output(folder_path, output_file):
    """Pipes the content of Python (.py) and text (.txt) files in a folder into a single text file.

    Args:
        folder_path (str): Path to the folder containing the files.
        output_file (str): Name of the output file to create.
    """

    with open(output_file, 'w') as outfile:
        for root, _, filenames in os.walk(folder_path):
            for filename in filenames:
                if filename.endswith('.py') or filename.endswith('.txt') or filename.endswith('.ipynb'):
                    filepath = os.path.join(root, filename)
                    with open(filepath, 'r') as infile:
                        print('Adding file: ',filepath)
                        outfile.write('File_Name: ')
                        outfile.write(infile.name)
                        outfile.write('\n File_Content: \n')
                        outfile.write(infile.read())
                        outfile.write('\n--- File Separator ---\n')

if __name__ == "__main__":
    folder_path = "./my_code_copy/"  # Replace with the path to your folder
    output_file = "combined_output.txt"
    pipe_files_to_single_output(folder_path, output_file)

Adding file:  ./my_code_copy/utils.py
Adding file:  ./my_code_copy/lecture2/rgb_to_grayscale/rgb_to_grayscale.py
Adding file:  ./my_code_copy/lecture2/mean_filter/mean_filter.py
Adding file:  ./my_code_copy/lecture5/matmul_l5.ipynb
Adding file:  ./my_code_copy/lecture4/cuda-mode-session-4.ipynb
Adding file:  ./my_code_copy/lecture3/pmpp.ipynb
Adding file:  ./my_code_copy/lecture1/load_inline.py
Adding file:  ./my_code_copy/lecture1/pytorch_square.py
Adding file:  ./my_code_copy/lecture1/numba_square.py
Adding file:  ./my_code_copy/lecture1/test.py
Adding file:  ./my_code_copy/lecture1/triton_square.py
Adding file:  ./my_code_copy/lecture1/pt_profiler.py
Adding file:  ./my_code_copy/lecture1/nsys_square.py
Adding file:  ./my_code_copy/lecture1/main.py
Adding file:  ./my_code_copy/lecture1/hello_load_inline.py


## Calculate number of characters in combined text file which includes all text and code retrieved from the Github Repository

In [56]:
#Calculate number of characters in combined text file which includes all text and code retrieved from the Github Repository
def count_characters(file_path):
    """Calculates the number of characters in a text file.

    Args:
        file_path (str): The path to the text file.

    Returns:
        int: The total number of characters in the file.
    """

    total_characters = 0
    with open(file_path, 'r') as file:
        for line in file:
            total_characters += len(line)  

    return total_characters

file_path = "./combined_output.txt"  # Replace with the path to your file
character_count = count_characters(file_path)
token_count = character_count/4
print(f"The file '{file_path}' has {character_count} characters.")
print(f"The file '{file_path}' has {token_count} tokens.")


The file './combined_output.txt' has 715983 characters.
The file './combined_output.txt' has 178995.75 tokens.



## Retrieve Git Repo's issue log


In [82]:
import git

def get_issue_log(repo_path):
    """Retrieves issues mentioned in commit messages of a Git repository.

    Args:
        repo_path (str): Path to the local Git repository.

    Returns:
        list: A list of dictionaries, each representing an issue with 'id' and 'message'.
    """

    repo = git.Repo(repo_path)
    issue_log = []

    for commit in repo.iter_commits():
        if "issue" in commit.message.lower():  # Adjust keywords as needed
            parts = commit.message.split('#')  # Assuming you use "#123" format
            if len(parts) > 1:
                issue_id = parts[1].split()[0]  # Get first element after #
                issue_log.append({
                    'id': issue_id,
                    'message': commit.message
                })

    return issue_log


repo_path = "./repo_clone"  # Replace with your repository path
issues = get_issue_log(repo_path)

if issues:
    print("Issues Found:")
    for issue in issues:
        print(f"- Issue ID: {issue['id']}")
        print(f"- Message: {issue['message']}")
else:
    print("No issues found in commit messages.")


No issues found in commit messages.


In [70]:
import git
import requests

def get_issue_log_from_github(repo_url):
    """Retrieves issues mentioned in commit messages of a GitHub repository.

    Args:
        repo_url (str): URL of the GitHub repository (e.g., 'https://github.com/user/repo')

    Returns:
        list: A list of dictionaries, each representing an issue with 'id', 'title', 
              'url'  and 'message'.
    """

    owner, repo_name = repo_url.split("/")[-2:]  # Extract owner and repo name

    # GitHub API endpoint for commits
    api_url = f"https://api.github.com/repos/{owner}/{repo_name}/commits"

    api_url = repo_url

    issue_log = []
    has_more_pages = True
    page_num = 1

    while has_more_pages:
        response = requests.get(api_url, params={'page': page_num})

        if response.status_code == 200:
            print(response.json)
            data = response.json
            #data = response.json()
            for commit in data:
                if "issue" in commit['commit']['message'].lower():
                    # Extract issue details (you might need to adjust based on the format)
                    parts = commit['commit']['message'].split('#')
                    if len(parts) > 1:
                        issue_id = parts[1].split()[0]
                        issue_title, issue_url = _get_issue_details(owner, repo_name, issue_id)
                        issue_log.append({
                            'id': issue_id,
                            'title': issue_title,
                            'url': issue_url,
                            'message': commit['commit']['message']
                        })
            has_more_pages = 'next' in response.links  # Check for pagination
            page_num += 1
        else:
            print(f"Error fetching commits: {response.status_code}")
            break

    return issue_log

def _get_issue_details(owner, repo_name, issue_id):
    """Fetches the title and URL of a GitHub issue."""
    api_url = f"https://api.github.com/repos/{owner}/{repo_name}/issues/{issue_id}"
    response = requests.get(api_url)
    if response.status_code == 200:
        print(response)
        data = response.json()
        return data['title'], data['html_url']
    else:
        return "Issue Not Found", ""  # Handle not found

#repo_url = "https://github.com/jasmeetsb/Google_Generative_AI_Samples/" 
issues = get_issue_log_from_github(repo_url)

# ... (Your issue printing logic) 


<bound method Response.json of <Response [200]>>


TypeError: 'method' object is not iterable

In [80]:
import git
import requests
import os

def get_issue_log_from_github(repo_url, github_token=None):
    """Retrieves issues mentioned in commit messages of a GitHub repository.

    Args:
        repo_url (str): URL of the GitHub repository (e.g., 'https://github.com/user/repo')
        github_token (str, optional): Personal access token to increase API rate limits.

    Returns:
        list: A list of dictionaries, each representing an issue with detailed information.
    """

    owner, repo_name = repo_url.split("/")[-2:]
    api_url = f"https://api.github.com/repos/{owner}/{repo_name}/commits"



    headers = {"Authorization": f"token {github_token}"} if github_token else None
    issue_log = []
    page_num = 1

    while True:
        response = requests.get(api_url, headers=headers, params={'page': page_num})

        if response.status_code == 200:
            data = response.json()

            for commit in data:
                issue_refs = _extract_issue_refs(commit['commit']['message'])
                if issue_refs:
                    for issue_id in issue_refs:
                        issue_details = _get_issue_details(owner, repo_name, issue_id, github_token)
                        if issue_details:
                            issue_log.append({
                                'commit_sha': commit['sha'],
                                'commit_message': commit['commit']['message'],
                                **issue_details  # Unpack title, url, state, etc.
                            })
            # Check for pagination
            if 'next' in response.links:
                page_num += 1
            else:
                break

        else:
            print(f"Error fetching commits: {response.status_code}")
            break

    return issue_log

def _extract_issue_refs(message):
    """Extracts potential issue references (e.g., #123) from a commit message."""
    # Customize your issue reference pattern as needed
    import re
    return re.findall(r'#(\d+)', message)

def _get_issue_details(owner, repo_name, issue_id, github_token=None):
    """Fetches details of a GitHub issue."""
    api_url = f"https://api.github.com/repos/{owner}/{repo_name}/issues/{issue_id}"
    headers = {"Authorization": f"token {github_token}"} if github_token else None
    response = requests.get(api_url, headers=headers)

    if response.status_code == 200:
        data = response.json()
        return {
            'id': issue_id,
            'title': data['title'],
            'url': data['html_url'],
            'state': data['state'] 
        }
    else:
        return None  # Return None for non-existing issues

if __name__ == "__main__":
    repo_url = "https://github.com/python/cpython"  # Example repository

    #repo_url = "https://github.com/cuda-mode/lectures"
    github_token = os.getenv('GITHUB_TOKEN')  # Optional: load token from environment variable

    issues = get_issue_log_from_github(repo_url, github_token)
    if issues:
        print("Issues Found:")
        for issue in issues:
            print(f"- Issue ID: {issue['id']} ({issue['state']})")
            print(f"- Title: {issue['title']}")
            print(f"- URL: {issue['url']}")
            print(f"- Commit: {issue['commit_sha'][:7]} - {issue['commit_message']}\n") 
    else:
        print("No issues found in commit messages.")


Error fetching commits: 403
No issues found in commit messages.


In [94]:
#
from git import Repo

local_dir = 'repo_clone'
repo_url = "https://github.com/gitpython-developers/QuickStartTutorialFiles.git"
repo_url =  "https://github.com/langchain-ai/langchain-benchmarks.git"

repo = Repo.clone_from(repo_url, local_dir)

In [97]:
repo_path = "repo_clone/"  # Replace with your repository path
issues = get_issue_log(repo_path)

if issues:
    print("Issues Found:")
    for issue in issues:
        print(f"- Issue ID: {issue['id']}")
        print(f"- Message: {issue['message']}")
else:
    print("No issues found in commit messages.")

No issues found in commit messages.
