In [10]:
# importing Java projects that contain a Main.Java class

import requests
import csv

url = "https://api.github.com/search/code?q=filename:Main.java+language:java&per_page=100"
headers = {'Authorization': 'token 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'}

response = requests.get(url, headers=headers)

if response.status_code != 200:
    raise Exception(f"Failed: HTTP error code: {response.status_code}")

data = response.json()["items"]

repos = set()
for item in data:
    repo_url = item["repository"]["html_url"]
    repos.add(repo_url)

with open("java_repos_with_main.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["Name", "Owner", "URL"])

    for repo_url in repos:
        parts = repo_url.split("/")
        owner = parts[-2]
        name = parts[-1]
        writer.writerow([name, owner, repo_url])


In [None]:
# Iterating over the java projects and counting the number of commits

import requests
import csv
import pandas as pd
import re

def get_api_url(repo_url):
    # Extract the username and repository name from the URL
    pattern = r"github.com/([^/]+)/([^/]+)"
    match = re.search(pattern, repo_url)
    if not match:
        return None
    username, repo_name = match.groups()

    # Build the API URL using the username and repository name
    api_url = f"https://api.github.com/repos/{username}/{repo_name}/commits"

    return api_url


def get_commits(repo_url):
    # Get the API URL for the repository
    api_url = get_api_url(repo_url)
    if api_url is None:
        return 0

    headers = {'Authorization': 'token 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'}
    response = requests.get(api_url, headers=headers)

    # Check if the response was successful
    if response.status_code != 200:
        return 0

    # Get the total number of commits
    total_commits = 0

    # Loop over all the pages of the API response
    page = 1
    while True:
        # Send a GET request to the API with the current page number
        response = requests.get(api_url + f"?page={page}", headers=headers)

        # Check if the response was successful
        if response.status_code != 200:
            break

        # Get the number of commits in the current page
        data = response.json()
        commits_in_page = len(data)

        # Add the number of commits in the current page to the total number of commits
        total_commits += commits_in_page

        # Break the loop if there are no more pages left
        if commits_in_page == 0:
            break

        # Increment the page number
        page += 1

    return total_commits


# Read the CSV file into a Pandas DataFrame
df = pd.read_csv("java_repos_with_main.csv")

# Add a "Commits" column to the DataFrame and initialize it with zeros
df["Commits"] = 0

# Loop through each row of the DataFrame starting from index 2 and get the number of commits for each repository
for i, row in df.iloc[29:].iterrows():
    url = row["URL"]
    commits = get_commits(url)
    df.at[i, "Commits"] = commits

# Save the updated DataFrame to a new CSV file
df.to_csv("java_repos_with_main_and_commits.csv", index=False)