In [None]:
import requests

# Replace with your actual token
GITHUB_TOKEN = 'YOUR_API_KEY'
OWNER = 'OWNER_NAME'   # replace with the owner/user/org
REPO = 'REPO_NAME'  # replace with the repository name

# GitHub API base URL
BASE_URL = f'https://api.github.com/repos/{OWNER}/{REPO}'

# Authentication header
headers = {
    'Authorization': f'token {GITHUB_TOKEN}',
    'Accept': 'application/vnd.github.v3+json'
}

# Fetch repository details
response = requests.get(BASE_URL, headers=headers)

if response.status_code == 200:
    repo_data = response.json()
    print("Repository Name:", repo_data['name'])
    print("Description:", repo_data['description'])
    print("Stars:", repo_data['stargazers_count'])
    print("Forks:", repo_data['forks_count'])
    print("Open Issues:", repo_data['open_issues_count'])
    print("Created at:", repo_data['created_at'])
    print("Last updated:", repo_data['updated_at'])
else:
    print("Error fetching repository data:", response.status_code)


In [None]:
import base64
readme_url = f'https://api.github.com/repos/{OWNER}/{REPO}/readme'
res = requests.get(readme_url, headers=headers)

if res.status_code == 200:
    data = res.json()
    content = base64.b64decode(data['content']).decode('utf-8')
    print("README Content:\n")
    print(content)
else:
    print("Failed to fetch README:", res.status_code)

In [None]:
#No of pull requests
pr_url = f'{BASE_URL}/pulls?state=all'
response = requests.get(pr_url, headers=headers)

if response.status_code == 200:
    pulls = response.json()
    if len(pulls) == 0:
        print("0 pull requests")
    else:
        for pr in pulls:
            print(f"PR #{pr['number']} by {pr['user']['login']}: {pr['title']} (State: {pr['state']})")
else:
    print("Failed to fetch PRs")


In [None]:
def get_paginated_data(url):
    results = []
    while url:
        res = requests.get(url, headers=headers)
        if res.status_code != 200:
            break
        results += res.json()
        # Check for next page
        if 'next' in res.links:
            url = res.links['next']['url']
        else:
            url = None
    return results

# Example for commits:
all_commits = get_paginated_data(f'{BASE_URL}/commits')
print("Total commits:", len(all_commits))


In [None]:
repo_url = f'https://api.github.com/repos/{OWNER}/{REPO}'
repo_data = requests.get(repo_url, headers=headers).json()
default_branch = repo_data['default_branch']


sha_url = f'https://api.github.com/repos/{OWNER}/{REPO}/git/trees/{default_branch}?recursive=1'
tree_res = requests.get(sha_url, headers=headers).json()

files = [item for item in tree_res.get('tree', []) if item['type'] == 'blob']

print(f"Total files found: {len(files)}")

# Download each file
for file in files:
    file_url = f"https://raw.githubusercontent.com/{OWNER}/{REPO}/{default_branch}/{file['path']}"
    print(f"Downloading: {file['path']}")
    content_res = requests.get(file_url)
    
    # Save to local
    with open(file['path'], 'w', encoding='utf-8') as f:
        f.write(content_res.text)


In [None]:
import matplotlib.pyplot as plt
from collections import defaultdict
from datetime import datetime

# GitHub API to get commits (paginated)
def get_all_commits():
    commits = []
    page = 1
    while True:
        url = f'https://api.github.com/repos/{OWNER}/{REPO}/commits?page={page}&per_page=100'
        res = requests.get(url, headers=headers)
        if res.status_code != 200:
            break
        data = res.json()
        if not data:
            break
        commits.extend(data)
        page += 1
    return commits

# Step 1: Fetch commit data
all_commits = get_all_commits()

# Step 2: Group by date
commit_dates = defaultdict(int)
for commit in all_commits:
    date_str = commit['commit']['author']['date']
    date_only = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ").date()
    commit_dates[date_only] += 1

# Step 3: Sort by date
sorted_dates = sorted(commit_dates.items())

# Step 4: Prepare for plotting
dates = [d[0] for d in sorted_dates]
counts = [d[1] for d in sorted_dates]

# Step 5: Plot
plt.figure(figsize=(6, 4))
plt.plot(dates, counts, marker='o', linestyle='-', color='blue')
plt.title(f'Commits per Day in {OWNER}/{REPO}')
plt.xlabel('Date')
plt.ylabel('Number of Commits')
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import seaborn as sns

# ----- 🔁 Fetch Commits -----
def fetch_commits(owner, repo):
    all_commits = []
    page = 1
    while True:
        url = f'https://api.github.com/repos/{owner}/{repo}/commits?page={page}&per_page=100'
        res = requests.get(url, headers=headers)
        if res.status_code != 200:
            break
        data = res.json()
        if not data:
            break
        all_commits.extend(data)
        page += 1
    return all_commits

# ----- 🔁 Fetch Contributors -----
def fetch_contributors(owner, repo):
    url = f'https://api.github.com/repos/{owner}/{repo}/contributors'
    res = requests.get(url, headers=headers)
    return res.json()

# ----- 🔁 Fetch Pull Requests -----
def fetch_pull_requests(owner, repo):
    all_prs = []
    page = 1
    while True:
        url = f'https://api.github.com/repos/{owner}/{repo}/pulls?state=all&page={page}&per_page=100'
        res = requests.get(url, headers=headers)
        data = res.json()
        if not data:
            break
        all_prs.extend(data)
        page += 1
    return all_prs


In [None]:
# Process commit data
commits = fetch_commits(OWNER, REPO)
commit_dates = defaultdict(int)
commit_authors = defaultdict(int)

for commit in commits:
    dt = commit['commit']['author']['date']
    author = commit['commit']['author']['name']
    date = datetime.strptime(dt, "%Y-%m-%dT%H:%M:%SZ").date()
    commit_dates[date] += 1
    commit_authors[author] += 1

df_commits = pd.DataFrame(sorted(commit_dates.items()), columns=['Date', 'Commits'])
df_authors = pd.DataFrame(commit_authors.items(), columns=['Author', 'CommitCount'])

# Plot 1: Line plot (Commits per Day)
plt.figure(figsize=(6, 5))
sns.lineplot(data=df_commits, x='Date', y='Commits', marker='o')
plt.title('Commits per Day')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()

# Plot 2: Bar chart (Commits per Contributor)
plt.figure(figsize=(6, 5))
sns.barplot(data=df_authors.sort_values(by='CommitCount', ascending=False), x='Author', y='CommitCount')
plt.title('Commits per Contributor')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
