In [1]:
import os
import json
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from scraper import fetch_website_links, fetch_website_contents
from openai import OpenAI

In [2]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")

openai = OpenAI()

API key looks good so far


In [3]:
link_system_prompt = """
You are provided with a list of links found on a GitHub profile page.
You should identify which links are public repositories and extract their repository names and full URLs.
You must respond ONLY in valid JSON format where the response is a JSON object where each key is the repository name and each value is the full repository URL.

The response must be a valid JSON object in this exact format (no additional text, no code blocks):
{
    "repo-name-1": "https://github.com/username/repo-name-1",
    "repo-name-2": "https://github.com/username/repo-name-2",
    "repo-name-3": "https://github.com/username/repo-name-3"
}

Important rules:
- Only include links that point to actual repositories (github.com/username/repo-name format)
- Extract only the repository name (the last part of the URL path) as the key
- Use the full URL as the value
- Do not include profile links, organization links, tabs, or any other non-repository links
- Respond with a valid JSON object only, no explanations or markdown
"""

In [4]:
def get_link_user_prompt(url):
    user_prompt = f"""
Here is the list of links found on the GitHub profile page: {url}

Please identify all public repositories from this list and respond in JSON format where each repository is represented as:
"repo name" : "full repository URL"

Only include links to public repositories (URLs that point to github.com/username/repo-name).
Do not include profile links, organization links, or any other non-repository links.

Links found on the page (some might be relative links):
"""
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [None]:
def select_relevant_links(url):
    """Select relevant GitHub repositories from a profile page"""
    print(f"Selecting repositories from {url}")
    response = openai.chat.completions.create(
        model="gpt-4.o-mini",
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_link_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    repos = json.loads(result)
    print(f"Found {len(repos)} repositories")
    return repos

In [6]:
print(select_relevant_links("https://github.com/ahmetcnrgl?tab=repositories"))

Selecting repositories from https://github.com/ahmetcnrgl?tab=repositories
Found 14 repositories
{'Olist-Analytics-Dashboard': 'https://github.com/ahmetcnrgl/Olist-Analytics-Dashboard', 'data-science-exercies': 'https://github.com/ahmetcnrgl/data-science-exercies', 'ahmetcnrgl': 'https://github.com/ahmetcnrgl/ahmetcnrgl', 'fullstack-assignment': 'https://github.com/ahmetcnrgl/fullstack-assignment', 'voice-assistant-w-python': 'https://github.com/ahmetcnrgl/voice-assistant-w-python', 'my-file-explorer': 'https://github.com/ahmetcnrgl/my-file-explorer', 'afet-bitirme-projesi': 'https://github.com/ahmetcnrgl/afet-bitirme-projesi', 'Istanbul-traffic-density-data-analysis': 'https://github.com/ahmetcnrgl/Istanbul-traffic-density-data-analysis', 'BattleShipGame': 'https://github.com/ahmetcnrgl/BattleShipGame', 'Student-Depression-Data-Analysis': 'https://github.com/ahmetcnrgl/Student-Depression-Data-Analysis', 'Milestone': 'https://github.com/ahmetcnrgl/Milestone', 'to-do-application': 'http

In [7]:
github_link_system_prompt="""You are a helpful assistant that summarizes GitHub repositories.
Analyze the provided data and create a clean Markdown list of the repositories."""

In [None]:
def get_github_link_user_prompt(repo_author,url):
    github_link_user_prompt=f"""Here are the projects from {repo_author} GitHub account. These are all public repos from {repo_author}"""
    github_link_user_prompt+=str(select_relevant_links(url))
    return github_link_user_prompt


In [9]:
get_github_link_user_prompt("ahmetcnrgl","https://github.com/ahmetcnrgl?tab=repositories")

Selecting repositories from https://github.com/ahmetcnrgl?tab=repositories
Found 14 repositories


"Here are the projects from f{repo_author} GitHub account. These are all public repos from f{repo_author}{'Olist-Analytics-Dashboard': 'https://github.com/ahmetcnrgl/Olist-Analytics-Dashboard', 'data-science-exercies': 'https://github.com/ahmetcnrgl/data-science-exercies', 'ahmetcnrgl': 'https://github.com/ahmetcnrgl/ahmetcnrgl', 'fullstack-assignment': 'https://github.com/ahmetcnrgl/fullstack-assignment', 'voice-assistant-w-python': 'https://github.com/ahmetcnrgl/voice-assistant-w-python', 'my-file-explorer': 'https://github.com/ahmetcnrgl/my-file-explorer', 'afet-bitirme-projesi': 'https://github.com/ahmetcnrgl/afet-bitirme-projesi', 'Istanbul-traffic-density-data-analysis': 'https://github.com/ahmetcnrgl/Istanbul-traffic-density-data-analysis', 'BattleShipGame': 'https://github.com/ahmetcnrgl/BattleShipGame', 'Student-Depression-Data-Analysis': 'https://github.com/ahmetcnrgl/Student-Depression-Data-Analysis', 'Milestone': 'https://github.com/ahmetcnrgl/Milestone', 'to-do-application

In [15]:
def create_link_selector(repo_author, url):
    repos = select_relevant_links(url)
    markdown_output = f"Here is a list of public repositories from **{repo_author}**:\n\n"
    for name, link in repos.items():
        markdown_output += f"- [{name}]({link})\n"

    display(Markdown(markdown_output))

In [16]:
create_link_selector("ahmetcnrgl","https://github.com/ahmetcnrgl?tab=repositories")

Selecting repositories from https://github.com/ahmetcnrgl?tab=repositories
Found 15 repositories


Here is a list of public repositories from **ahmetcnrgl**:

- [github-repo-extractor](https://github.com/ahmetcnrgl/github-repo-extractor)
- [Olist-Analytics-Dashboard](https://github.com/ahmetcnrgl/Olist-Analytics-Dashboard)
- [data-science-exercies](https://github.com/ahmetcnrgl/data-science-exercies)
- [ahmetcnrgl](https://github.com/ahmetcnrgl/ahmetcnrgl)
- [fullstack-assignment](https://github.com/ahmetcnrgl/fullstack-assignment)
- [voice-assistant-w-python](https://github.com/ahmetcnrgl/voice-assistant-w-python)
- [my-file-explorer](https://github.com/ahmetcnrgl/my-file-explorer)
- [afet-bitirme-projesi](https://github.com/ahmetcnrgl/afet-bitirme-projesi)
- [Istanbul-traffic-density-data-analysis](https://github.com/ahmetcnrgl/Istanbul-traffic-density-data-analysis)
- [BattleShipGame](https://github.com/ahmetcnrgl/BattleShipGame)
- [Student-Depression-Data-Analysis](https://github.com/ahmetcnrgl/Student-Depression-Data-Analysis)
- [Milestone](https://github.com/ahmetcnrgl/Milestone)
- [to-do-application](https://github.com/ahmetcnrgl/to-do-application)
- [Populer-Parola-Veri-Setinde-Islemler](https://github.com/ahmetcnrgl/Populer-Parola-Veri-Setinde-Islemler)
- [Metal-Muzik-Data-Mining](https://github.com/ahmetcnrgl/Metal-Muzik-Data-Mining)
