<a href="https://colab.research.google.com/github/benny-png/Automating-GitHub-Repository-Collaborator-Insight-/blob/main/GitHub_Repository_Collaborator_Insight.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install requests pandas openai

Collecting openai
  Downloading openai-1.37.1-py3-none-any.whl.metadata (22 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading openai-1.37.1-py3-none-any.whl (337 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m337.0/337.0 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading h11-0.14.0-py3-none-an

In [None]:
import requests
from datetime import datetime, timedelta
import os
from openai import OpenAI
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import pandas as pd
from google.colab import drive
import json
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders
# Mount Google Drive
drive.mount('/content/drive')

# Load credentials from a JSON file in your Google Drive
with open('/content/drive/MyDrive/github_analyzer_credentials.json', 'r') as f:
    credentials = json.load(f)

# Set the API keys
os.environ["OPENAI_API_KEY"] = credentials['openai_api_key']
GITHUB_TOKEN = credentials['github_token']
EMAIL_ADDRESS = credentials['email_address']
EMAIL_PASSWORD = credentials['email_password']

# Initialize the OpenAI client
client = OpenAI()

def safe_request(url, headers):
    print(f"Fetching data from: {url}")
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        print(f"Successfully fetched data from: {url}")
        return response.json()
    except requests.RequestException as e:
        print(f"Error fetching data from {url}: {str(e)}")
        return None

def fetch_repo_data(username, repo):
    print(f"Fetching data for repository: {username}/{repo}")
    headers = {
        'Authorization': f'token {GITHUB_TOKEN}'
    }

    base_url = f"https://api.github.com/repos/{username}/{repo}"

    endpoints = {
        'repo': '',
        'commits': '/commits',
        'branches': '/branches',
        'pulls': '/pulls',
        'issues': '/issues?state=all',
        'contributors': '/contributors',
        'contents': '/contents',
        'readme': '/readme'
    }

    responses = {k: safe_request(base_url + v, headers) for k, v in endpoints.items()}

    if not responses['repo']:
        print("Error: Repository not found or inaccessible")
        return None

    print("Analyzing recent commits and file changes")
    thirty_days_ago = (datetime.now() - timedelta(days=30)).isoformat()
    recent_commits = [commit for commit in responses['commits'] or []
                      if commit['commit']['author']['date'] > thirty_days_ago]

    current_files = set(file['path'] for file in responses['contents'] or [])
    file_changes = analyze_file_changes(username, repo, headers, current_files)

    print("Analyzing branch structure")
    branches = responses['branches'] or []
    main_branch = next((branch for branch in branches if branch['name'] == 'main'), None)
    dev_branch = next((branch for branch in branches if branch['name'] == 'dev'), None)
    feature_branches = [branch for branch in branches if branch['name'] not in ['main', 'dev']]

    print("Fetching README content")
    readme_content = ''
    if responses['readme']:
        readme_url = responses['readme'].get('download_url')
        if readme_url:
            readme_response = requests.get(readme_url)
            if readme_response.status_code == 200:
                readme_content = readme_response.text

    data = {
        'name': repo,
        'owner': username,
        'description': responses['repo'].get('description', 'No description available'),
        'recent_commits': recent_commits,
        'open_issues': [issue for issue in responses['issues'] or [] if issue['state'] == 'open'],
        'closed_issues': [issue for issue in responses['issues'] or [] if issue['state'] == 'closed'],
        'pull_requests': responses['pulls'] or [],
        'contributors': responses['contributors'] or [],
        'main_branch': main_branch,
        'dev_branch': dev_branch,
        'feature_branches': feature_branches,
        'updated_at': responses['repo']['updated_at'],
        'file_changes': file_changes,
        'total_files': len(current_files),
        'readme_content': readme_content
    }
    print("Repository data fetched successfully")
    return data

def analyze_file_changes(username, repo, headers, current_files):
    print("Analyzing file changes")
    thirty_days_ago = (datetime.now() - timedelta(days=30)).isoformat()
    commits_url = f"https://api.github.com/repos/{username}/{repo}/commits?since={thirty_days_ago}"
    commits = safe_request(commits_url, headers) or []

    added_files = set()
    removed_files = set()
    modified_files = set()

    for commit in commits:
        commit_url = commit['url']
        commit_data = safe_request(commit_url, headers)
        if commit_data and 'files' in commit_data:
            for file in commit_data['files']:
                if file['status'] == 'added':
                    added_files.add(file['filename'])
                elif file['status'] == 'removed':
                    removed_files.add(file['filename'])
                elif file['status'] == 'modified':
                    modified_files.add(file['filename'])

    actually_added = added_files - removed_files
    actually_removed = removed_files - added_files

    print("File changes analysis completed")
    return {
        'added': list(actually_added),
        'removed': list(actually_removed),
        'modified': list(modified_files)
    }

def generate_detailed_report(repo_data, report_length='medium'):
    print(f"Generating {report_length} report")
    if not repo_data:
        return "Error: Unable to fetch repository data. The repository may not exist or is inaccessible."

    prompt = f"""
    Generate a {report_length} detailed report to update me on recent info for the GitHub repository (where we contribute so catch me up to ensure maximum collaboration) {repo_data['name']} owned by {repo_data['owner']} where we collaborate to keep me on track without having to go read or before I go and check.
    Focus on recent changes, updates, branch management, and project structure changes. Address the following points:

    1. Project Overview:
       - Description: {repo_data['description']}
       - README Content: {repo_data['readme_content'][:500]}... (truncated for brevity)

    2. Recent Activity:
       - Summarize the {len(repo_data['recent_commits'])} commits made in the last 30 days.
       - Highlight any significant changes or features introduced.

    3. Project Structure Changes:
       - Added Files: {len(repo_data['file_changes']['added'])}
       - Removed Files: {len(repo_data['file_changes']['removed'])}
       - Modified Files: {len(repo_data['file_changes']['modified'])}
       - Total Files: {repo_data['total_files']}
       - Analyze the nature of these changes and their impact on the project structure.

    4. Issues and Pull Requests:
       - Open Issues: {len(repo_data['open_issues'])}
       - Closed Issues: {len(repo_data['closed_issues'])}
       - Open Pull Requests: {len(repo_data['pull_requests'])}
       - Analyze the nature of open issues and pull requests.

    5. Branch Management:
       - Main Branch: {repo_data['main_branch']['name'] if repo_data['main_branch'] else 'Not found'}
       - Dev Branch: {repo_data['dev_branch']['name'] if repo_data['dev_branch'] else 'Not found'}
       - Feature Branches: {', '.join([b['name'] for b in repo_data['feature_branches']]) if repo_data['feature_branches'] else 'None found'}
       - Assess adherence to the branching strategy (main, dev, and feature branches).

    6. Contributors:
       - Total Contributors: {len(repo_data['contributors'])}
       - Identify the most active contributors based on recent commits.

    7. Project Health:
       - Evaluate the overall health of the project based on commit frequency, issue resolution, pull request management, and file changes.
       - Suggest areas for improvement in project management and development practices.

    Provide a {report_length} analysis that goes beyond the basic description and offers actionable insights for the development team.
    Consider the implications of the file changes on the project's development and maintenance.
    Use the README content to understand the project's purpose and provide context-aware insights. NOTE: no need to follow this template present a best as you think.
    """

    print("Sending request to OpenAI for report generation")
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are an expert GitHub repository analyzer providing detailed insights and recommendations."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=2000 if report_length == 'long' else (1000 if report_length == 'medium' else 500),
        n=1,
        temperature=0.7,
    )

    print("Report generated successfully")
    return response.choices[0].message.content.strip()

def send_email(subject, body, to_email, csv_filename):
    print(f"Sending email to: {to_email}")
    msg = MIMEMultipart()
    msg['From'] = EMAIL_ADDRESS
    msg['To'] = to_email
    msg['Subject'] = subject

    msg.attach(MIMEText(body, 'plain'))

    # Attach CSV file
    with open(csv_filename, "rb") as attachment:
        part = MIMEBase("application", "octet-stream")
        part.set_payload(attachment.read())

    encoders.encode_base64(part)
    part.add_header(
        "Content-Disposition",
        f"attachment; filename= {csv_filename.split('/')[-1]}",
    )
    msg.attach(part)

    try:
        server = smtplib.SMTP('smtp.gmail.com', 587)
        server.starttls()
        server.login(EMAIL_ADDRESS, EMAIL_PASSWORD)
        text = msg.as_string()
        server.sendmail(EMAIL_ADDRESS, to_email, text)
        server.quit()
        print(f"Email sent successfully to {to_email}")
    except Exception as e:
        print(f"Error sending email: {str(e)}")

def analyze_and_send_report(username, repo, report_length='medium', to_email=None):
    print(f"Starting analysis for repository: {username}/{repo}")
    repo_data = fetch_repo_data(username, repo)
    if repo_data:
        detailed_report = generate_detailed_report(repo_data, report_length)

        # Save CSV report
        csv_filename = f'/content/drive/MyDrive/{username}_{repo}_report.csv'
        save_csv_report(repo_data, csv_filename)

        if to_email:
            subject = f"GitHub Repository Report: {username}/{repo}"
            send_email(subject, detailed_report, to_email, csv_filename)

        return detailed_report
    else:
        error_message = "Error: Unable to fetch repository data. The repository may not exist or is inaccessible."
        if to_email:
            send_email("GitHub Repository Analysis Error", error_message, to_email, None)
        return error_message

# Example usage
print("Starting repository analysis")
recipient_emails = ["bennyodd3@gmail.com"] # Enter email recipients here, ensure you allow in Google settings third party apps or set 2factor and generate passkey

if recipient_emails:
    for email in recipient_emails:
        report = analyze_and_send_report("benny-png", "API-Endpoint-Development-OCR", "medium", email)
else:
    # Handle the case where no emails are provided
    report = analyze_and_send_report("benny-png", "API-Endpoint-Development-OCR", "medium", None)

print("\nDetailed Report:")
print(report)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Starting repository analysis
Starting analysis for repository: benny-png/API-Endpoint-Development-OCR
Fetching data for repository: benny-png/API-Endpoint-Development-OCR
Fetching data from: https://api.github.com/repos/benny-png/API-Endpoint-Development-OCR
Successfully fetched data from: https://api.github.com/repos/benny-png/API-Endpoint-Development-OCR
Fetching data from: https://api.github.com/repos/benny-png/API-Endpoint-Development-OCR/commits
Successfully fetched data from: https://api.github.com/repos/benny-png/API-Endpoint-Development-OCR/commits
Fetching data from: https://api.github.com/repos/benny-png/API-Endpoint-Development-OCR/branches
Successfully fetched data from: https://api.github.com/repos/benny-png/API-Endpoint-Development-OCR/branches
Fetching data from: https://api.github.com/repos/benny-png/API-Endpoint-Development-OCR/pulls
Successf