In [9]:
import os
import json
from datetime import datetime, timedelta
from github import Github
from github.GithubException import GithubException
from collections import defaultdict, Counter
from dotenv import load_dotenv
import time
import re
from groq import Groq

load_dotenv()
github_token = os.getenv('GITHUB_TOKEN')
groq_api_key = os.getenv('GROQ_API_KEY')

# Initialize GitHub client
g = Github(github_token)  # Increase items per page for efficiency

# Initialize Groq client
groq_client = Groq(api_key=groq_api_key)

In [10]:
repo = g.get_repo("GDP-ADMIN/CATAPA-WEB")

In [28]:

def get_commit_details(commit):
    return {
        'sha': commit.sha,
        'message': commit.commit.message,
        'url': commit.html_url,
        'date': commit.commit.author.date,
        'files_changed': [f.filename for f in commit.files],
        'additions': commit.stats.additions,
        'deletions': commit.stats.deletions
    }

def get_user_activity(username, start_date, end_date, repo_list):
    activity = defaultdict(lambda: defaultdict(list))

    for repo_name in repo_list:
        print(f"Checking repository: {repo_name}")
        try:
            repo = g.get_repo(repo_name)

            # Fetch detailed commit information
            try:
                commits = repo.get_commits(author=username, since=start_date, until=end_date)
                for commit in commits:
                    activity[repo_name]['commits'].append(get_commit_details(commit))
            except GithubException as e:
                if e.status == 409 and "Git Repository is empty" in str(e):
                    print(f"Repository {repo_name} is empty. Skipping commits...")
                else:
                    print(f"Error accessing commits for repo {repo_name}: {str(e)}")

            # Fetch all pull requests
            try:
                prs = repo.get_pulls(state='all', sort='created', direction='desc')
                pr_count = 0
                for pr in prs:
                    print(f"Checking PR #{pr.number} in {repo_name}")
                    if pr.user.login == username:
                        # Check if PR was active during the time frame
                        if (pr.created_at <= end_date and pr.created_at >= start_date) or \
                           (pr.updated_at and pr.updated_at <= end_date and pr.updated_at >= start_date):
                            pr_commits = pr.get_commits()
                            pr_details = {
                                'number': pr.number,
                                'title': pr.title,
                                'description': pr.body,  # Include PR description
                                'state': pr.state,
                                'url': pr.html_url,
                                'created_at': pr.created_at,
                                'updated_at': pr.updated_at,
                                'commits': [get_commit_details(commit) for commit in pr_commits]
                            }
                            activity[repo_name]['pull_requests'].append(pr_details)
                            pr_count += 1
                            print(f"Added PR #{pr.number} to report")
                    if pr.created_at < start_date:
                        print(f"Reached PRs before {start_date}, stopping PR fetch for {repo_name}")
                        break
                print(f"Total PRs added for {repo_name}: {pr_count}")
            except GithubException as e:
                print(f"Error accessing pull requests for repo {repo_name}: {str(e)}")

        except GithubException as e:
            print(f"Error accessing repository {repo_name}: {str(e)}")

        # Sleep to avoid hitting rate limits
        time.sleep(2)

    return activity

In [31]:
def load_config():
    with open('config.json', 'r') as f:
        return json.load(f)

config = load_config()
username = config['github_username']
repo_list = config['repositories']

end_date = datetime.now()
start_date = end_date - timedelta(days=14)

print(f"Fetching GitHub activity for {username} from {start_date} to {end_date}")
activity_data = get_user_activity(username, start_date, end_date, repo_list)

Fetching GitHub activity for jamesjf7 from 2024-08-30 15:24:11.885699 to 2024-09-13 15:24:11.885699
Checking repository: GDP-ADMIN/CATAPA-WEB
Checking PR #10387 in GDP-ADMIN/CATAPA-WEB
Checking PR #10386 in GDP-ADMIN/CATAPA-WEB
Checking PR #10385 in GDP-ADMIN/CATAPA-WEB
Checking PR #10384 in GDP-ADMIN/CATAPA-WEB
Checking PR #10383 in GDP-ADMIN/CATAPA-WEB
Checking PR #10382 in GDP-ADMIN/CATAPA-WEB
Checking PR #10381 in GDP-ADMIN/CATAPA-WEB
Checking PR #10380 in GDP-ADMIN/CATAPA-WEB
Checking PR #10379 in GDP-ADMIN/CATAPA-WEB
Checking PR #10378 in GDP-ADMIN/CATAPA-WEB
Added PR #10378 to report
Checking PR #10377 in GDP-ADMIN/CATAPA-WEB
Checking PR #10376 in GDP-ADMIN/CATAPA-WEB
Checking PR #10375 in GDP-ADMIN/CATAPA-WEB
Checking PR #10374 in GDP-ADMIN/CATAPA-WEB
Checking PR #10373 in GDP-ADMIN/CATAPA-WEB
Checking PR #10372 in GDP-ADMIN/CATAPA-WEB
Checking PR #10371 in GDP-ADMIN/CATAPA-WEB
Checking PR #10370 in GDP-ADMIN/CATAPA-WEB
Checking PR #10369 in GDP-ADMIN/CATAPA-WEB
Checking PR #10

In [35]:
# print as pandas dataframe
import pandas as pd

def get_pr_details(pr):
    return {
        'Repository': repo_name,
        'PR Number': pr['number'],
        'Title': pr['title'],
        'Description': pr['description'],
        'State': pr['state'],
        'URL': pr['url'],
        'Created At': pr['created_at'],
        'Updated At': pr['updated_at'],
        'Commits': len(pr['commits'])
    }

def get_commit_details(commit):
    return {
        'Repository': repo_name,
        'SHA': commit['sha'],
        'Message': commit['message'],
        'URL': commit['url'],
        'Date': commit['date'],
        'Files Changed': len(commit['files_changed']),
        'Additions': commit['additions'],
        'Deletions': commit['deletions']
    }

pr_data = []
commit_data = []
for repo_name, activity in activity_data.items():
    for pr in activity['pull_requests']:
        pr_data.append(get_pr_details(pr))
    for commit in activity['commits']:
        commit_data.append(get_commit_details(commit))

ModuleNotFoundError: No module named 'pandas'