In [3]:
import requests
import pandas as pd
import time
from getpass import getpass

# GitHub Authentication
token = getpass("Enter your GitHub token: ")
headers = {
    'Authorization': f'Bearer {token}',
    'Accept': 'application/vnd.github+json'
}

# Parameters for User Search
city = "Hyderabad"
min_followers = 50
users_data = []

# Function to fetch users in Hyderabad with over 50 followers
def fetch_users():
    page = 1
    while True:
        url = f"https://api.github.com/search/users?q=location:{city}+followers:>{min_followers}&per_page=100&page={page}"
        response = requests.get(url, headers=headers)
        data = response.json()

        # Break if no more users
        if 'items' not in data:
            break

        # Append user data
        for user in data['items']:
            users_data.append(user['login'])

        # Check for pagination
        if 'next' not in response.links:
            break

        page += 1
        time.sleep(1)  # Avoid rate limits

fetch_users()
print(f"Total users fetched: {len(users_data)}")


Enter your GitHub token: ··········
Total users fetched: 505


In [5]:
def fetch_user_details():
    for login in users_data:
        user_url = f"https://api.github.com/users/{login}"
        repos_url = f"https://api.github.com/users/{login}/repos?per_page=100"

        # Get user details
        user_response = requests.get(user_url, headers=headers)
        user_info = user_response.json()

        # Append cleaned user details
        user_details.append({
            'login': user_info.get('login', ''),
            'name': user_info.get('name', ''),
            'company': clean_company_name(user_info.get('company', '')),
            'location': user_info.get('location', ''),
            'email': user_info.get('email', ''),
            'hireable': str(user_info.get('hireable', '')),
            'bio': user_info.get('bio', ''),
            'public_repos': user_info.get('public_repos', 0),
            'followers': user_info.get('followers', 0),
            'following': user_info.get('following', 0),
            'created_at': user_info.get('created_at', '')
        })

        # Get repository details for each user
        repo_response = requests.get(repos_url, headers=headers)
        repos = repo_response.json()

        for repo in repos[:500]:  # Limit to 500 most recent
            repositories_data.append({
                'login': login,
                'full_name': repo.get('full_name', ''),
                'created_at': repo.get('created_at', ''),
                'stargazers_count': repo.get('stargazers_count', 0),
                'watchers_count': repo.get('watchers_count', 0),
                'language': repo.get('language', ''),
                'has_projects': str(repo.get('has_projects', '')),
                'has_wiki': str(repo.get('has_wiki', '')),
                'license_name': repo.get('license', {}).get('key', '') if repo.get('license') else ''
            })
        time.sleep(1)  # Avoid rate limits

fetch_user_details()
print("Fetched detailed data for all users.")


Fetched detailed data for all users.


In [6]:
# Convert to DataFrames
users_df = pd.DataFrame(user_details)
repos_df = pd.DataFrame(repositories_data)

# Save to CSV
users_df.to_csv('users.csv', index=False)
repos_df.to_csv('repositories.csv', index=False)
print("Data saved to users.csv and repositories.csv")


Data saved to users.csv and repositories.csv
