In [None]:
import os
import json
import pandas as pd

import requests
GITHUB_TOKEN='github_pat_'

# Define the path to the folder containing the JSON files
BENCHMARK_DIR = 'data/benchmark/'

# List to store the JSON data
data = []

# Load all JSON files in the folder
for file_name in os.listdir(BENCHMARK_DIR):
    if file_name.endswith('.json'):
        file_path = os.path.join(BENCHMARK_DIR, file_name)
        with open(file_path, 'r') as file:
            json_data = json.load(file)
            data.append(json_data)

In [None]:
# Convert the list of dictionaries to a pandas DataFrame
df = pd.json_normalize(data)

In [None]:

original_search_condition = df['updatedDependency.githubCompareLink'] == "A GitHub repository could not be found for the updated dependency."

new_search_condition = df['updatedDependency.githubCompareLink'].str.contains("Relevant tags were not found", na=False)

combined_search_condition = original_search_condition | new_search_condition

filtered_df = df[combined_search_condition]

In [None]:
mapping = [
{
    "updatedDependency.dependencyGroupID": "org.eclipse.jetty",
    "updatedDependency.dependencyArtifactID": "jetty-server",
    "githubRepoLink": "https://github.com/jetty/jetty.project"
},
{
    "updatedDependency.dependencyGroupID": "ch.qos.logback",
    "updatedDependency.dependencyArtifactID": "logback-classic",
    "githubRepoLink": "https://github.com/qos-ch/logback"
},
{
    "updatedDependency.dependencyGroupID": "org.jooq",
    "updatedDependency.dependencyArtifactID": "jooq-meta",
    "githubRepoLink": "https://github.com/jOOQ/jOOQ"
},
{
    "updatedDependency.dependencyGroupID": "org.eclipse.persistence",
    "updatedDependency.dependencyArtifactID": "org.eclipse.persistence.moxy",
    "githubRepoLink": "https://github.com/eclipse-ee4j/eclipselink"
},
{
    "updatedDependency.dependencyGroupID": "org.springframework",
    "updatedDependency.dependencyArtifactID": "*",
    "githubRepoLink": "https://github.com/spring-projects/spring-framework"
},
{
    "updatedDependency.dependencyGroupID": "com.google.api.grpc",
    "updatedDependency.dependencyArtifactID": "grpc-google-cloud-pubsublite-v1",
    "githubRepoLink": "https://github.com/googleapis/java-pubsublite"
},
{
    "updatedDependency.dependencyGroupID": "com.puppycrawl.tools",
    "updatedDependency.dependencyArtifactID": "checkstyle",
    "githubRepoLink": "https://github.com/checkstyle/checkstyle"
},
{
    "updatedDependency.dependencyGroupID": "org.apache.logging.log4j",
    "updatedDependency.dependencyArtifactID": "log4j-core",
    "githubRepoLink": "https://github.com/apache/logging-log4j1"
},
{
    "updatedDependency.dependencyGroupID": "io.dropwizard",
    "updatedDependency.dependencyArtifactID": "dropwizard-client",
    "githubRepoLink": "https://github.com/dropwizard/dropwizard"
},
{
    "updatedDependency.dependencyGroupID": "net.minidev",
    "updatedDependency.dependencyArtifactID": "json-smart",
    "githubRepoLink": "https://github.com/netplex/json-smart-v2"
},
{
    "updatedDependency.dependencyGroupID": "io.quarkiverse.googlecloudservices",
    "updatedDependency.dependencyArtifactID": "quarkus-google-cloud-common-grpc",
    "githubRepoLink": "https://github.com/quarkiverse/quarkus-google-cloud-services"
},
{
    "updatedDependency.dependencyGroupID": "org.slf4j",
    "updatedDependency.dependencyArtifactID": "slf4j-api",
    "githubRepoLink": "https://github.com/qos-ch/slf4j"
},
{
    "updatedDependency.dependencyGroupID": "org.bouncycastle",
    "updatedDependency.dependencyArtifactID": "bcprov-jdk15on",
    "githubRepoLink": "https://github.com/bcgit/bc-java"
},
{
    "updatedDependency.dependencyGroupID": "com.github.cryptomorin",
    "updatedDependency.dependencyArtifactID": "XSeries",
    "githubRepoLink": "https://github.com/CryptoMorin/XSeries"
},
{
    "updatedDependency.dependencyGroupID": "org.apache.maven.surefire",
    "updatedDependency.dependencyArtifactID": "maven-surefire-common",
    "githubRepoLink": "https://github.com/apache/maven-surefire"
},
{
    "updatedDependency.dependencyGroupID": "org.springframework.cloud",
    "updatedDependency.dependencyArtifactID": "spring-cloud-stream",
    "githubRepoLink": "https://github.com/spring-cloud/spring-cloud-stream"
},
{
    "updatedDependency.dependencyGroupID": "org.springframework.boot",
    "updatedDependency.dependencyArtifactID": "*",
    "githubRepoLink": "https://github.com/spring-projects/spring-boot"
},
{
    "updatedDependency.dependencyGroupID": "com.squareup.okhttp3",
    "updatedDependency.dependencyArtifactID": "okhttp",
    "githubRepoLink": "https://github.com/square/okhttp"
},
{
    "updatedDependency.dependencyGroupID": "com.h2database",
    "updatedDependency.dependencyArtifactID": "h2",
    "githubRepoLink": "https://github.com/h2database/h2database"
},
{
    "updatedDependency.dependencyGroupID": "commons-io",
    "updatedDependency.dependencyArtifactID": "commons-io",
    "githubRepoLink": "https://github.com/apache/commons-io"
},
{
    "updatedDependency.dependencyGroupID": "org.flywaydb",
    "updatedDependency.dependencyArtifactID": "flyway-core",
    "githubRepoLink": "https://github.com/flyway/flyway"
},
{
    "updatedDependency.dependencyGroupID": "org.apache.poi",
    "updatedDependency.dependencyArtifactID": "poi-ooxml",
    "githubRepoLink": "https://github.com/apache/poi"
},
{
    "updatedDependency.dependencyGroupID": "com.artipie",
    "updatedDependency.dependencyArtifactID": "asto-core",
    "githubRepoLink": "https://github.com/artipie/asto"
},
{
    "updatedDependency.dependencyGroupID": "org.jsoup",
    "updatedDependency.dependencyArtifactID": "jsoup",
    "githubRepoLink": "https://github.com/jhy/jsoup"
},
{
    "updatedDependency.dependencyGroupID": "org.apache.sshd",
    "updatedDependency.dependencyArtifactID": "sshd-common",
    "githubRepoLink": "https://github.com/apache/mina-sshd"
},
{
    "updatedDependency.dependencyGroupID": "net.datafaker",
    "updatedDependency.dependencyArtifactID": "datafaker",
    "githubRepoLink": "https://github.com/datafaker-net/datafaker"
},
{
    "updatedDependency.dependencyGroupID": "redis.clients",
    "updatedDependency.dependencyArtifactID": "jedis",
    "githubRepoLink": "https://github.com/redis/jedis"
},
{
    "updatedDependency.dependencyGroupID": "joda-time",
    "updatedDependency.dependencyArtifactID": "joda-time",
    "githubRepoLink": "https://github.com/JodaOrg/joda-time"
},
{
    "updatedDependency.dependencyGroupID": "org.pitest",
    "updatedDependency.dependencyArtifactID": "pitest-entry",
    "githubRepoLink": "https://github.com/hcoles/pitest"
},
]

In [None]:
def get_tags(repo_api_url):
    tags = []
    page = 1
    while True:
        tags_url = f"{repo_api_url}/tags?page={page}&per_page=100"
        response = requests.get(tags_url, headers={'Authorization': f'token {GITHUB_TOKEN}'})
        if response.status_code != 200:
            break

        page_tags = response.json()
        if not page_tags:
            break
        tags.extend([tag['name'] for tag in page_tags])
        page += 1
    return tags

def construct_compare_url(repo_url, prev_version, new_version):
    # check if url returns 200
    print("Construct", repo_url, prev_version, new_version)
    formatted_repo_url = f"{repo_url}/compare/{prev_version}...{new_version}"
    req = requests.get(formatted_repo_url)
    if req.status_code == 200:
        return formatted_repo_url
    else:
        return False

def is_in_tags(tags, version):
    return any(version in tag for tag in tags)

def find_version_in_tags(tags, version):
    for tag in tags:
        if version in tag:
            return tag
    return None

def update_json_file(file_path, mapping):
    with open(file_path, 'r') as file:
        entry = json.load(file)

    updated = False

    print(entry)

    updatedDependency = entry.get('updatedDependency')
    group_id = updatedDependency.get('dependencyGroupID')
    artifact_id = updatedDependency.get('dependencyArtifactID')
    prev_version = updatedDependency.get('previousVersion')
    new_version = updatedDependency.get('newVersion')
    print(group_id, artifact_id, prev_version, new_version)

    # Find the corresponding repo link
    repo_info = next((item for item in mapping if item["updatedDependency.dependencyGroupID"] == group_id and (item["updatedDependency.dependencyArtifactID"] == artifact_id or item["updatedDependency.dependencyArtifactID"] == "*")), None)

    if repo_info:
        repo_api_url = repo_info["githubRepoLink"].replace('github.com', 'api.github.com/repos')
        tags = get_tags(repo_api_url)

        relevant_tags_not_found_message = f"Relevant tags were not found in the GitHub repository {(repo_info['githubRepoLink']).replace('https://github.com/', '')} for the updated dependency."

        prev_tag = find_version_in_tags(tags, prev_version)
        new_tag = find_version_in_tags(tags, new_version)

        if prev_tag and new_tag:
            compare_url = construct_compare_url(repo_info["githubRepoLink"], prev_tag, new_tag)
            if compare_url:
                entry['updatedDependency']['githubCompareLink'] = compare_url
            else:
                entry['updatedDependency']['githubCompareLink'] = relevant_tags_not_found_message
        else:
            entry['updatedDependency']['githubCompareLink'] = relevant_tags_not_found_message
        updated = True
    else:
        entry['updatedDependency']['githubCompareLink'] = "Repository information not found in mapping."

    if updated:
        with open(file_path, 'w') as file:
            file_text = json.dumps(entry, indent=2)
            file.write(file_text.replace('": ', '" : '))

def process_benchmark_directory(directory, filtered_df, mapping):
    for idx, row in filtered_df.iterrows():
        breaking_commit = row['breakingCommit']
        file_path = os.path.join(directory, f"{breaking_commit}.json")
        if os.path.isfile(file_path):
            update_json_file(file_path, mapping)

# Assuming 'filtered_df' and 'mapping' are already defined
process_benchmark_directory(BENCHMARK_DIR, filtered_df, mapping)

In [None]:
# load every file from data/benchmark and save it with the weird Jackson indentation
for file_name in os.listdir(BENCHMARK_DIR):
    if file_name.endswith('.json'):
        file_path = os.path.join(BENCHMARK_DIR, file_name)
        with open(file_path, 'r') as file:
            entry = json.load(file)
        if 'Relevant tags were not found in the' in entry['updatedDependency']['githubCompareLink']:
            entry['updatedDependency']['githubCompareLink'].replace('https://github.com/', '')
        with open(file_path, 'w') as file:
            file_text = json.dumps(entry, indent=2)
            file.write(file_text.replace('": ', '" : '))