In [1]:
!pip install requests gitpython

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[0m

In [5]:
import os
import requests
import json
from git import Repo
import csv
import subprocess

# Constants
GITHUB_TOKEN = 'ENTER_YOUR_GITHUB_TOKEN'
LANGUAGE = "Java"
NUM_REPOS = 10
REPOS_DIR = './repos'
REFMINER_JAR = 'RefactoringMiner-2.0.jar'
OUTPUT_JSON = 'output.json'

# Step 1: Get Repositories
def get_repos(language, num_repos):
    repos = []
    url = f"https://api.github.com/search/repositories?q=language:{language}&sort=stars&order=desc"
    headers = {"Authorization": f"token {GITHUB_TOKEN}"}
    response = requests.get(url, headers=headers)
    data = response.json()
    for repo in data['items'][:num_repos]:
        repos.append(repo['full_name'])
    return repos

# Step 2: Clone Repositories
def clone_repo(repo_url, dest_dir):
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)
    Repo.clone_from(repo_url, dest_dir)

# Step 3: Run RefactoringMiner
def run_refactoring_miner(repo_path, output_file):
    subprocess.run(['java', '-jar', REFMINER_JAR, '-a', repo_path, '-json', output_file])

# Step 4: Parse Refactorings
def parse_refactorings(json_file):
    with open(json_file, 'r') as file:
        data = json.load(file)
    refactorings = []
    for commit in data['commits']:
        for ref in commit['refactorings']:
            if ref['type'] == 'Extract Method':
                refactorings.append(ref)
    return refactorings

# Step 5: Get Code Snippet
def get_code_snippet(file_path, start_line, end_line):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    return ''.join(lines[start_line-1:end_line])

# Step 6: Create Pairs
def create_pairs(refactorings, repo_path):
    pairs = []
    for ref in refactorings:
        original_code = get_code_snippet(os.path.join(repo_path, ref['filePath']), ref['leftSideLocations'][0]['startLine'], ref['leftSideLocations'][0]['endLine'])
        refactored_code = get_code_snippet(os.path.join(repo_path, ref['filePath']), ref['rightSideLocations'][0]['startLine'], ref['rightSideLocations'][0]['endLine'])
        pairs.append((original_code, refactored_code))
    return pairs

# Step 7: Save Pairs
def save_pairs_to_csv(pairs, output_file):
    with open(output_file, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["OriginalCode", "RefactoredCode"])
        for pair in pairs:
            writer.writerow(pair)

# Main Workflow
def main():
    # Get repositories
    repos = get_repos(LANGUAGE, NUM_REPOS)
    
    for repo in repos:
        repo_name = repo.split('/')[-1]
        repo_url = f"https://github.com/{repo}.git"
        dest_dir = os.path.join(REPOS_DIR, repo_name)
        
        # Clone repository
        clone_repo(repo_url, dest_dir)
        
        # Run RefactoringMiner
        run_refactoring_miner(dest_dir, OUTPUT_JSON)
        
        # Parse refactorings
        refactorings = parse_refactorings(OUTPUT_JSON)
        
        # Create pairs
        pairs = create_pairs(refactorings, dest_dir)
        
        # Save pairs
        save_pairs_to_csv(pairs, f"{repo_name}_pairs.csv")
        
        # Clean up output json for next run
        if os.path.exists(OUTPUT_JSON):
            os.remove(OUTPUT_JSON)

if __name__ == "__main__":
    main()


{'total_count': 16383799, 'incomplete_results': False, 'items': [{'id': 132464395, 'node_id': 'MDEwOlJlcG9zaXRvcnkxMzI0NjQzOTU=', 'name': 'JavaGuide', 'full_name': 'Snailclimb/JavaGuide', 'private': False, 'owner': {'login': 'Snailclimb', 'id': 29880145, 'node_id': 'MDQ6VXNlcjI5ODgwMTQ1', 'avatar_url': 'https://avatars.githubusercontent.com/u/29880145?v=4', 'gravatar_id': '', 'url': 'https://api.github.com/users/Snailclimb', 'html_url': 'https://github.com/Snailclimb', 'followers_url': 'https://api.github.com/users/Snailclimb/followers', 'following_url': 'https://api.github.com/users/Snailclimb/following{/other_user}', 'gists_url': 'https://api.github.com/users/Snailclimb/gists{/gist_id}', 'starred_url': 'https://api.github.com/users/Snailclimb/starred{/owner}{/repo}', 'subscriptions_url': 'https://api.github.com/users/Snailclimb/subscriptions', 'organizations_url': 'https://api.github.com/users/Snailclimb/orgs', 'repos_url': 'https://api.github.com/users/Snailclimb/repos', 'events_url

FileNotFoundError: [Errno 2] No such file or directory: 'java'