In [None]:
import os
import json
import pandas as pd

def read_file(file):
    try:
        with open(file, 'r', encoding='utf-8') as f:
            content = f.read()
    except Exception as e:
        print(f"File Open Error:", str(e))
        content = ''
    return content

def commits_to_sheet(directory, output_directory, option):
    output_file = os.path.join("..", "..", output_directory, f"{directory}_{option}.xlsx")
    
    try:
        df = pd.read_excel(output_file)
    except FileNotFoundError:
        df = pd.DataFrame()

    cnt = 0
    commits = []
    log_file = os.path.join("..", "..", "commit-logs", f"{directory}-files-log.json")
    with open(log_file, "r", encoding='utf-8') as f:
        data = f.read()
        raw_data = r''+data
        try:
            commits = json.loads(raw_data)
        except json.JSONDecodeError as e:
            print(f"JSON Decode Error: {e}")
    
    for commit in commits:
        if commit['changed_file_list'] != []:
            cnt += 1
            print(cnt)
        else:
            continue

        for file in commit['changed_file_list']:
            print(file)
        
            sanitized_path = file.replace("/", "_")
            file_path = f'{cnt}_{option}_{sanitized_path}'
            file_content = read_file(file_path)

            response_file_path = file_path.replace('.java', '_response.txt')
            response_content = read_file(response_file_path)

            row_data = {
                        "Repository":directory,
                        "Hash Code":commit['commitHash'],
                        "File Name":file,
                        "Content":file_content,
                        "Response":response_content
                        }
            df = df.append(row_data, ignore_index=True)

    df.to_excel(output_file, index=False)

    print(directory, "Data has been saved to an Excel file.")


In [None]:
if __name__ == "__main__":
    directories = ["pgjdbc", "junit4", "gson", "guava", "h2database", "bc-java"]
    working_directory = "commit-files"
    output_directory = "commit-sheets"
    
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    
    # Assuming it was already created in step 3
    os.chdir(working_directory)
    
    for directory in directories:
        os.chdir(directory)
        commits_to_sheet(directory, output_directory, "diff")
        os.chdir("..")
    
    print("All tasks have been completed.")        
    os.chdir("..")


In [None]:
os.getcwd()

In [None]:
os.chdir("..")
os.chdir("..")