In [93]:
pip install radon

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [132]:
from pylint.lint import Run
from io import StringIO
import os
from radon.raw import analyze as raw_analyze
from radon.complexity import cc_visit
from radon.metrics import h_visit
from radon.visitors import HalsteadVisitor
import pandas as pd
import json
import sys
from radon.visitors import ComplexityVisitor
from radon.metrics import mi_visit
from radon.raw import analyze
from pathlib import Path


def ensure_directory_exists(file_path):
    directory = os.path.dirname(file_path)
    if not os.path.exists(directory):
        os.makedirs(directory)

def get_python_files(directory):
    python_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.py'):
                full_path = os.path.join(root, file)
                python_files.append(full_path)
    return python_files

def run_pylint_on_files(files, output_file_path):
    pylint_args = files + ['--output-format=json']
    original_stdout = sys.stdout
    sys.stdout = StringIO()

    try:
        Run(pylint_args)
    except SystemExit as e:
        print(f"Pylint exited with {e.code}")
    finally:
        pylint_output = sys.stdout.getvalue()
        sys.stdout = original_stdout

    with open(output_file_path, 'w', encoding='utf-8') as output_file:
        output_file.write(pylint_output)

def parse_json_string(json_str):
    decoder = json.JSONDecoder()
    idx = 0
    json_objects = []

    while idx < len(json_str):
        try:
            obj, end = decoder.raw_decode(json_str[idx:])
            json_objects.append(obj)
            idx += end
        except json.JSONDecodeError:
            break

    return json_objects


def parse_pylint_output(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()

    pylint_data = parse_json_string(content)

    issues = []
    for item in pylint_data:
        # If the item is a list (which seems to be the case based on the error), iterate through it
        if isinstance(item, list):
            for sub_item in item:
                issues.append(parse_issue(sub_item))
        else:  # Assuming item is a dict
            issues.append(parse_issue(item))

    return issues


def parse_issue(item): #pylint output
    """Extract issue data from a dict."""
    return {
        "file": item.get('path', 'N/A'),
        "message_id": item.get('message-id', 'N/A'),
        "symbol": item.get('symbol', 'N/A'),
        "category": item.get('type', 'N/A'),
        "module_name": item.get('module', 'N/A'),
        "line": item.get('line', 'N/A'),
        "column": item.get('column', 'N/A')
    }


def write_pylint_output_to_excel(issues, output_file_path):
    df = pd.DataFrame(issues)
    df.to_excel(output_file_path, index=False, engine='openpyxl')
    print(f"Data successfully written to {output_file_path}")

def run_radon_on_files(file_path): #radon output
    results = []
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()

        # Analyze raw metrics
        raw_metrics = raw_analyze(content)

        # Analyze cyclomatic complexity
        cc_results = cc_visit(content)

        # Analyze Halstead metrics
        h_report = h_visit(content)

        # Calculate maintainability index
        mi_metrics = mi_visit(content, multi=True)

        # Directly use Radon's complexity scores
        complexity_metrics = [{
            'name': item.name,
            'cyclomatic_score': item.complexity,
            # Simplifying the output, not categorizing into rank or risk
            'block_type': 'F' if hasattr(item, 'complexity') else 'N/A',  # F for function, simplification
        } for item in cc_results if hasattr(item, 'complexity')]

        metrics = {
            'file_path': file_path,
            'complexity': complexity_metrics,
            'raw': raw_metrics._asdict(),
            'halstead': {
                'h1': h_report.total.h1,
                'h2': h_report.total.h2,
                'N1': h_report.total.N1,
                'N2': h_report.total.N2,
                'vocabulary': h_report.total.vocabulary,
                'length': h_report.total.length,
                'calculated_length': h_report.total.calculated_length,
                'volume': h_report.total.volume,
                'difficulty': h_report.total.difficulty,
                'effort': h_report.total.effort,
                'time': h_report.total.time,
                'bugs': h_report.total.bugs,
            },
            'maintainability_index': mi_metrics,
        }

        results.append(metrics)

    except IOError as e:
        print(f"Error opening or reading file {file_path}: {e}")
    return results

def aggregate_radon_metrics(python_files):
    all_metrics = [run_radon_on_files(file_path) for file_path in python_files]
    return all_metrics


def save_to_excel(aggregated_metrics, output_file_path):
    columns = [
        'file', 'type', 'line', 'column', 'name', 'complexity_rank',
        'complexity_score', 'loc', 'lloc', 'sloc', 'comments', 'single_comments',
        'multi', 'blank', 'h1', 'h2', 'N1', 'N2', 'vocabulary', 'length',
        'calculated_length', 'volume', 'difficulty', 'effort', 'time',
        'bugs', 'maintainability_index'
    ]
    data = []

    # Helper function to safely convert to int, defaulting to default on failure
    def safe_int(value, default=0):
        try:
            return int(value)
        except ValueError:
            return default

    for file_metrics in aggregated_metrics:
        for metrics in file_metrics:
            complexity_metrics = metrics['complexity']
            halstead_metrics = metrics['halstead']
            raw_metrics = metrics['raw']
            mi_metrics = metrics['maintainability_index']

            for func_metrics in complexity_metrics:
                # Apply safe_int to ensure lloc, loc, and sloc are integers
                lloc = safe_int(func_metrics.get('lloc', 0))
                loc = safe_int(func_metrics.get('loc', 0))
                sloc = safe_int(func_metrics.get('sloc', 0))

                row = {
                    'file': metrics['file_path'],
                    'type': 'Function' if lloc > 0 else 'Module',
                    'name': func_metrics['name'],
                    'complexity_rank': 'N/A',  # Define your own logic or leave as 'N/A'
                    'complexity_score': func_metrics['cyclomatic_score'],
                    'loc': loc,
                    'lloc': lloc,
                    'sloc': sloc,
                    'comments': raw_metrics['comments'],
                    'single_comments': 'N/A',  # Update if single line comments info is available
                    'multi': raw_metrics['multi'],
                    'blank': raw_metrics['blank'],
                    'h1': halstead_metrics['h1'],
                    'h2': halstead_metrics['h2'],
                    'N1': halstead_metrics['N1'],
                    'N2': halstead_metrics['N2'],
                    'vocabulary': halstead_metrics['vocabulary'],
                    'length': halstead_metrics['length'],
                    'calculated_length': halstead_metrics['calculated_length'],
                    'volume': halstead_metrics['volume'],
                    'difficulty': halstead_metrics['difficulty'],
                    'effort': halstead_metrics['effort'],
                    'time': halstead_metrics['time'],
                    'bugs': halstead_metrics['bugs'],
                    'maintainability_index': mi_metrics,
                }
                data.append(row)

    df = pd.DataFrame(data, columns=columns)
    df.to_excel(output_file_path, index=False, engine='openpyxl')


    
    
def ensure_directory_exists(file_path):
    directory = os.path.dirname(file_path)
    if not os.path.exists(directory):
        os.makedirs(directory)

def correct_excel(pylint_excel_output_path, radon_excel_output_path):
    try:
        # Read data from the radon Excel file
        radon_data = pd.read_excel(radon_excel_output_path)
        pylint_data = pd.read_excel(pylint_excel_output_path)

        radon_data['file'] = radon_data['file'].str.replace(r'\\', '/', regex=True).str.replace('//', '/')
    
        #pylint_data['file'] = pylint_data['file'].str.replace(r'\\', r'\\\\', regex=True)
        
        # Replace backslashes with forward slashes in the 'file' column
        #radon_data['file'] = radon_data['file'].str.replace('\\', '/')
        # radon_data['file'] = radon_data['file'].apply(lambda x: re.sub(r'^.*maltrail-master', 'maltrail-master', x).replace('\\', '/'))
        #radon_data['file'] = radon_data['file'].str.replace('/', '\\').replace('\\', '\\\\')

        # Save the corrected data back to the same Excel file
        radon_data.to_excel(radon_excel_output_path, index=False)
        
        print(f"Radon data corrected and saved to {radon_excel_output_path}")
    except Exception as e:
        print(f"Error correcting Radon Excel: {e}")


def merge_excel(pylint_excel_output_path ,radon_excel_output_path,merged_output_path):
    # Load the data from the Excel files
    pylint_data = pd.read_excel(pylint_excel_output_path)
    radon_data = pd.read_excel(radon_excel_output_path)

    # Clean and standardize the data for merging
    # Now using 'file' as the common column for both
    # pylint_data['file'] = pylint_data['file'].str.strip().str.lower().astype(str)
    # radon_data['file'] = radon_data['file'].str.strip().str.lower().astype(str)

    # Perform the merge operation using 'file' as the common column
    #merged_data = pd.merge(pylint_data, radon_data, on='file', how='outer', suffixes=('_pylint', '_radon'))
    merged_data = pd.merge(pylint_data, radon_data, on='file', how='inner')

    # Check if the merge resulted in an empty DataFrame
    if not merged_data.empty:
        # Save the merged DataFrame to a new Excel file
        merged_data.to_excel(merged_output_path, index=False)
        print(f"Merged data saved to {merged_output_path}")
    else:
        print("No matching data found for merge. Please check the 'file' column for matching values.")


#directory_path = "D:/GCIS_Project/Data/data_extracted/recommenders-main/recommenders-1.1.1"
directory_path = "/student/eln263//Desktop/SF/ChuanhuChatGPT/ChuanhuChatGPT-20240305"

# --------------------------------------------------------

target_directory = directory_path
pylint_output_path = "/student/eln263/Desktop/SF/ChuanhuChatGPT/ChuanhuChatGPT-ChuanhuChatGPT-20240305/output/json/pylint_output.json"  # Changed to JSON
pylint_excel_output_path = "/student/eln263/Desktop/SF/ChuanhuChatGPT/ChuanhuChatGPT-20240305/output/xlsx/pylint_data.xlsx"


ensure_directory_exists(pylint_output_path)
ensure_directory_exists(pylint_excel_output_path)

files_to_lint = get_python_files(target_directory)
run_pylint_on_files(files_to_lint, pylint_output_path)
issues = parse_pylint_output(pylint_output_path)
write_pylint_output_to_excel(issues, pylint_excel_output_path)
print(f'Data saved to {pylint_excel_output_path}')

# ----------------------------------------------------------

radon_output_path = "/student/eln263/Desktop/SF/ChuanhuChatGPT/ChuanhuChatGPT-20240305/output/json/radon_output.json"  # Changed to JSON
radon_excel_output_path = "/student/eln263/Desktop/SF/ChuanhuChatGPT/ChuanhuChatGPT-20240305/output/xlsx/radon_data.xlsx"



ensure_directory_exists(radon_output_path)
ensure_directory_exists(radon_excel_output_path)

files_to_radon = get_python_files(target_directory)
metrics_radon = aggregate_radon_metrics(files_to_radon)
save_to_excel(metrics_radon, radon_excel_output_path)

print(f'Data saved to {radon_excel_output_path}')

#-----------------------------------------------------------------

last_name = os.path.basename(directory_path),
if isinstance(last_name, tuple):
    last_name = last_name[0]

correct_excel(pylint_excel_output_path, radon_excel_output_path)


#correct_excel(pylint_excel_output_path, radon_excel_output_path)
merged_output_path = "/student/eln263/Desktop/SF/ChuanhuChatGPT/ChuanhuChatGPT-20240305/output/xlsx/merged1.xlsx"


merge_excel(pylint_excel_output_path, radon_excel_output_path, merged_output_path)




Data successfully written to /student/eln263/Desktop/SF/ChuanhuChatGPT/ChuanhuChatGPT-20240305/output/xlsx/pylint_data.xlsx
Data saved to /student/eln263/Desktop/SF/ChuanhuChatGPT/ChuanhuChatGPT-20240305/output/xlsx/pylint_data.xlsx
Data saved to /student/eln263/Desktop/SF/ChuanhuChatGPT/ChuanhuChatGPT-20240305/output/xlsx/radon_data.xlsx
Radon data corrected and saved to /student/eln263/Desktop/SF/ChuanhuChatGPT/ChuanhuChatGPT-20240305/output/xlsx/radon_data.xlsx
Merged data saved to /student/eln263/Desktop/SF/ChuanhuChatGPT/ChuanhuChatGPT-20240305/output/xlsx/merged1.xlsx
