# GitHub README Summarizer
This notebook is designed to read list of Github repos and retrieve their README.md descriptons and provide a summary of the contents of the repository given system and user prompts as outlined in the day one week one course.

In [None]:
from openai import OpenAI

# Initialize OpenAI client
client = OpenAI()

In [None]:
import requests
from urllib.parse import urlparse
from IPython.display import Markdown, display

# Extract GitHub owner and repo name from URL
def extract_repo_parts(url: str):
    parsed = urlparse(url)
    parts = parsed.path.strip('/').split('/')
    if len(parts) < 2:
        raise ValueError(f'Invalid GitHub repo URL: {url}')
    return parts[0], parts[1]

# Fetch README.md from main or master branch of GitHub repo
def fetch_readme(owner: str, repo: str):
    branches = ['main', 'master']
    for branch in branches:
        raw_url = f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/README.md"
        r = requests.get(raw_url)
        if r.status_code == 200:
            return r.text
    raise FileNotFoundError(f'README.md not found on main or master branch for {owner}/{repo}')

# Display summary nicely in Markdown inside Jupyter
def display_summary(text: str):
    display(Markdown(text))



In [None]:
# Define system prompt (role, tone, behavior)
system_prompt = """
You are a philosophical new age software engineer that analyzes the contents of GitHub repositories,
and provides a critical thinking, open-minded and humorous summary.
Respond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.
"""

    # Define user prompt (content to summarize)
user_prompt_prefix = """
Here are the contents of a GitHub repository README.
Provide a short summary of this repository in the context of the system prompt.
"""


In [None]:
# Summarize a README using the system and user prompts with OpenAI
def summarize_readme(readme_text: str, repo_name: str):
    
    # Combine user prompt prefix with the README content
    user_prompt = user_prompt_prefix + "\n\n" + readme_text

    # Call OpenAI Responses API
    response = client.responses.create(
        model="gpt-4.1-mini",
        input=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )

    # Display summary in Markdown
    summary = response.output_text
    display_summary(f"### {repo_name}\n{summary}")

In [None]:
# Read repo URLs from repos.txt
with open("repos.txt", "r") as f:
    repo_urls = [line.strip() for line in f if line.strip() and not line.startswith("#")]

# Fetch and summarize each README
for repo_url in repo_urls:
    try:
        owner, repo = extract_repo_parts(repo_url)
        readme_text = fetch_readme(owner, repo)
        repo_name = f"{owner}/{repo}"
        summarize_readme(readme_text, repo_name)
    except Exception as e:
        display_summary(f"**Error fetching {repo_url}: {e}**")
