In [1]:
import pandas as pd
import json
import os
from tqdm import tqdm

In [6]:
def get_metrics_from_dict(halstead_metrics, mi_metrics):
    mi_index = None
    
    if 'mi' in mi_metrics.keys():
        mi_index = mi_metrics['mi']
    
    metrics = {
        'HCPL': halstead_metrics['calculated_length'],
        'HDIF': halstead_metrics['difficulty'],
        'HEFF': halstead_metrics['effort'],
        'HNDB': halstead_metrics['bugs'],
        'HPL':  halstead_metrics['length'],
        'HPV':  halstead_metrics['vocabulary'],
        'HTRP': halstead_metrics['time'],
        'HVOL': halstead_metrics['volume'],
        'MI':mi_index
    }
    
    return metrics

def json_to_csv(repo_name):
    
    # Open raw json with halstead metrics
    with open(f'Data/Data_raw_json/{repo_name}/Halstead/results.jsonl') as f:
        data_halstead = f.readlines()
    
    # Open raw json with mi metric
    with open(f'Data/Data_raw_json/{repo_name}/MI/results.jsonl') as f:
        data_mi = f.readlines()
    
    # Create df with radon metrics columns
    df = pd.DataFrame(columns=['HCPL', 'HDIF', 'HEFF', 'HNDB', 'HPL', 'HPV', 'HTRP', 'HVOL', 'MI'])
    
    # Iterate through json
    for idx, _ in enumerate(data_halstead):
        
        # Load dict halstead
        file_halstead_metrics = json.loads(data_halstead[idx])
        
        # Load dict mi
        try:
            file_mi_metrics = json.loads(data_mi[idx])
        except IndexError:
            file_mi_metrics = {'mi': None}
        
        # Get file name
        try:
            file_name = list(file_halstead_metrics.keys())[0]
        except IndexError:
            continue
        
        
        file_halstead_metrics = file_halstead_metrics[file_name]
        
        try:
            file_mi_metrics = file_mi_metrics[file_name]
        except KeyError:
            file_mi_metrics = {'mi': None}
        
        # Get function names
        try:
            func_names = file_halstead_metrics['functions'].keys()
        except KeyError:
            continue
        
        # Iterate through functions in file
        for func_name in func_names:
            new_row = get_metrics_from_dict(file_halstead_metrics['functions'][func_name], file_mi_metrics)
            
            df = df.append(pd.Series(new_row, name=f"{repo_name}/{file_name}/{func_name}"))
    
    # Calculate mean
    mean_mi = df['MI'].mean()
    # Fill nans
    df['MI'] = df['MI'].fillna(mean_mi)
    
    # Create dir if not exists
    if not os.path.exists(f"Data/Data_raw_csv/{repo_name}"):
        os.mkdir(f"Data/Data_raw_csv/{repo_name}")
    # Save csv
    df.to_csv(f"Data/Data_raw_csv/{repo_name}/radon_metrics.csv")

In [7]:
for repository_name in tqdm(os.listdir("Data/Data_raw_json")):
    json_to_csv(repository_name)

100%|████████████████████████████████████████████████████████████████████████████████| 102/102 [22:50<00:00, 13.43s/it]
