In [5]:
pip install radon


Note: you may need to restart the kernel to use updated packages.




In [2]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import os

dataset = pd.read_csv("Python_LargeClassSmell_Dataset.csv")

# Define the 12 features you want to select and normalize
selected_features = [
    'difficulty', 'scloc', 'loc', 'effort', 'time', 'volume',
    'bugs', 'lloc', 'comments', 'blanks', 'single_comments',
    'calculated_length'
]

# Create a new DataFrame with only the selected features
selected_dataset = dataset[selected_features]

# Create a Min-Max scaler
scaler = MinMaxScaler()

# Fit and transform the scaler on the selected features
selected_dataset = pd.DataFrame(scaler.fit_transform(selected_dataset), columns=selected_features)

In [9]:
import pandas as pd
import joblib
from radon.raw import analyze
from radon.metrics import h_visit
import ast
import os

# Step 1: Extract code metrics using Radon
def extract_metrics(file_path):
    with open(file_path, 'r') as file:
        source_code = file.read()
    
    metrics = analyze(source_code)
    
    # Use the ast module from the Python standard library
    ast_node = ast.parse(source_code)
    
    halstead_metrics = h_visit(ast_node)

    return metrics, halstead_metrics

# Step 2: Save metrics in a Pandas DataFrame
def create_dataframe(file_paths):
    data = []

    for file_path in file_paths:
        metrics, halstead_metrics = extract_metrics(file_path)
        
        # Access metrics from the 'total' attribute of Halstead
        row_data = {
            'difficulty': halstead_metrics.total.difficulty,
            'scloc': metrics.sloc,
            'loc': metrics.loc,
            'effort': halstead_metrics.total.effort,
            'time': halstead_metrics.total.time,
            'volume': halstead_metrics.total.volume,
            'bugs': halstead_metrics.total.bugs,
            'lloc': metrics.lloc,
            'comments': metrics.comments,
            'blanks': metrics.blank,
            'single_comments': metrics.single_comments,
            'calculated_length': halstead_metrics.total.calculated_length,
            #'multi_comments': metrics.multi,
            #'h1': halstead_metrics.total.h1,
            #'h2': halstead_metrics.total.h2,
            #'n1': halstead_metrics.total.N1,
            #'n2': halstead_metrics.total.N2,
            #'vocabulary': halstead_metrics.total.vocabulary,
            #'length': halstead_metrics.total.length,
        }

        data.append(row_data)

    df = pd.DataFrame(data)
    return df

# Step 3: Normalize the DataFrame using MinMaxScaler
def normalize_dataframe(df):
    normalized_df = pd.DataFrame(scaler.transform(df), columns=df.columns)
    return normalized_df

# Step 4: Load the XGBoost model
def load_model(model_path):
    with open(model_path, 'rb') as file:
        model = joblib.load(file)
    return model

# Step 5: Predict using the loaded model
def predict_data(model, data):
    predictions = model.predict(data)
    predictions = predictions.astype(int)
    return predictions

# Example usage:
file_paths = ['code_example/large_class_example.py','code_example/non_large_class_example.py']  # Add more file paths if needed
data_frame = create_dataframe(file_paths)
normalized_data = normalize_dataframe(data_frame)

model_path = 'model\dt_large_class_model.pkl'
model = load_model(model_path)

predictions = predict_data(model, normalized_data)
for i, prediction in enumerate(predictions):
    file_name = os.path.basename(file_paths[i])  # Extract file name from path
    if prediction:  # Prediction indicates code smell
        print(f"{file_name} memiliki code smell large class")
    else:
        print(f"{file_name} tidak memiliki code smell large class")


large_class_example.py memiliki code smell large class
non_large_class_example.py tidak memiliki code smell large class
