In [None]:
import requests
import time
import pandas as pd

# Optional: GitHub token to avoid rate limits
GITHUB_TOKEN = "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"
HEADERS = {"Authorization": f"token {GITHUB_TOKEN}"} if GITHUB_TOKEN else {}

# Clean and format company names
def clean_company_name(company):
    if not company:
        return None
    return company.strip().lstrip('@').upper()

# Fetch detailed user information
def fetch_user_details(username):
    url = f"https://api.github.com/users/{username}"
    response = requests.get(url, headers=HEADERS)
    if response.status_code != 200:
        print(f"Error fetching user details for {username}: {response.json()}")
        return None
    return response.json()

# Fetch users in Austin with 100+ followers, dynamically handle pagination
def fetch_users_in_austin():
    url = "https://api.github.com/search/users"
    query = "location:Austin followers:>100"
    users = []
    page = 1

    while True:
        params = {"q": query, "per_page": 100, "page": page}
        response = requests.get(url, headers=HEADERS, params=params)

        if response.status_code != 200:
            print(f"Error: {response.json()}")
            break

        data = response.json().get("items", [])
        if not data:
            break  # No more data, exit the loop

        users.extend(data)
        print(f"Fetched {len(data)} users from page {page}.")
        page += 1  # Move to the next page
        time.sleep(2)  # Be polite to the API

    return users

# Fetch all repositories for a user, dynamically handle pagination
def fetch_repositories(username):
    url = f"https://api.github.com/users/{username}/repos"
    repos = []
    page = 1

    while True:
        params = {"per_page": 100, "page": page}
        response = requests.get(url, headers=HEADERS, params=params)

        if response.status_code != 200:
            print(f"Error fetching repos for {username}: {response.json()}")
            break

        data = response.json()
        if not data:
            break  # No more data, exit the loop

        repos.extend(data)
        print(f"Fetched {len(data)} repos from page {page} for {username}.")
        page += 1  # Move to the next page
        time.sleep(1)  # Be polite to the API

    return repos

# Main function to collect data and export to Excel
def main():
    users_data = []
    repos_data = []

    users = fetch_users_in_austin()
    print(f"Total users fetched: {len(users)}")

    for user in users:
        username = user["login"]
        user_details = fetch_user_details(username)

        if user_details:
            users_data.append({
                "login": user_details["login"],
                "name": user_details.get("name"),
                "company": clean_company_name(user_details.get("company")),
                "location": user_details.get("location"),
                "email": user_details.get("email"),
                "hireable": user_details.get("hireable"),
                "bio": user_details.get("bio"),
                "public_repos": user_details.get("public_repos"),
                "followers": user_details.get("followers"),
                "following": user_details.get("following"),
                "created_at": user_details.get("created_at"),
            })

        print(f"Fetching repositories for {username}...")
        repos = fetch_repositories(username)

        for repo in repos:
            repos_data.append({
                "login": username,
                "full_name": repo["full_name"],
                "created_at": repo["created_at"],
                "stargazers_count": repo["stargazers_count"],
                "watchers_count": repo["watchers_count"],
                "language": repo["language"],
                "has_projects": repo["has_projects"],
                "has_wiki": repo["has_wiki"],
                "license_name": repo["license"]["name"] if repo.get("license") else None
            })

    # Convert data to DataFrames and save to Excel
    users_df = pd.DataFrame(users_data)
    repos_df = pd.DataFrame(repos_data)

    users_df.to_excel("usersall.xlsx", index=False)
    repos_df.to_excel("repositoriesall.xlsx", index=False)

    print("Data saved to users.xlsx and repositories.xlsx")

if __name__ == "__main__":
    main()


Fetched 100 users from page 1.
Fetched 100 users from page 2.
Fetched 100 users from page 3.
Fetched 100 users from page 4.
Fetched 71 users from page 5.
Total users fetched: 471
Fetching repositories for getify...
Fetched 69 repos from page 1 for getify.
Fetching repositories for benawad...
Fetched 100 repos from page 1 for benawad.
Fetched 100 repos from page 2 for benawad.
Fetched 54 repos from page 3 for benawad.
Fetching repositories for steveklabnik...
Fetched 100 repos from page 1 for steveklabnik.
Fetched 100 repos from page 2 for steveklabnik.
Fetched 100 repos from page 3 for steveklabnik.
Fetched 100 repos from page 4 for steveklabnik.
Fetched 100 repos from page 5 for steveklabnik.
Fetched 100 repos from page 6 for steveklabnik.
Fetched 100 repos from page 7 for steveklabnik.
Fetched 100 repos from page 8 for steveklabnik.
Fetched 31 repos from page 9 for steveklabnik.
Fetching repositories for cloudflare...
Fetched 100 repos from page 1 for cloudflare.
Fetched 100 repos fr

In [None]:
import pandas as pd

# Load the users data from the Excel file
users_df = pd.read_excel("usersall.xlsx")

# Sort users by followers in descending order
top_users = users_df.sort_values(by="followers", ascending=False).head(5)

# Extract the 'login' column and convert it to a comma-separated string
top_logins = ", ".join(top_users["login"].tolist())

# Print the result
print(f"Top 5 users in Austin by followers: {top_logins}")


Top 5 users in Austin by followers: getify, benawad, steveklabnik, cloudflare, jbogard
