In [1]:
import numpy as np
import pandas as pd
import pickle

In [2]:
!pip install javalang
import javalang

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com



[notice] A new release of pip is available: 23.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
df = pd.read_csv('../Dataset/LongMethod.csv')

In [4]:
df.head()

Unnamed: 0,Code,Smell
0,public void CodeCheck(){\n if (n == JOp...,1
1,public void CodeCheck(){\n if (n == JOption...,0
2,public void CodeCheck(){\n if (n == JOption...,0
3,public void LoanCalculator(){\r\n if (d...,1
4,"public void analyzeData() {\r\n int x = 10,...",0


In [5]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
df['Smell'] = encoder.fit_transform(df['Smell'])
df['Smell'].value_counts()

Smell
0    29
1    21
Name: count, dtype: int64

In [6]:
def wrap_method_in_class(method_code):
    class_code = f"""
    public class Temp {{
        {method_code}
    }}
    """
    return class_code

In [7]:
def calculate_cyclomatic_complexity(method_code):

    try:
        wrapped_code = wrap_method_in_class(method_code)
        tree = javalang.parse.parse(wrapped_code)
        complexity = 1

        for path, node in tree:
            if isinstance(node, (javalang.tree.IfStatement,
                                 javalang.tree.ForStatement,
                                 javalang.tree.WhileStatement,
                                 javalang.tree.DoStatement,
                                 javalang.tree.SwitchStatement,
                                 javalang.tree.CatchClause)):
                complexity += 1

        return complexity

    except (javalang.parser.JavaSyntaxError, javalang.tokenizer.LexerError) as e:
        print(f"Error parsing method: {e}")
        return None

In [8]:
df['Cyclomatic_Complexity'] = df['Code'].apply(calculate_cyclomatic_complexity)

In [9]:
print(df["Cyclomatic_Complexity"])

0     17
1     10
2      9
3     46
4      8
5     10
6      1
7      4
8     20
9     14
10     1
11    13
12    18
13    13
14    39
15     4
16    16
17     4
18     2
19     3
20     1
21    17
22     3
23    18
24     2
25     3
26     4
27     3
28     7
29    11
30     1
31    21
32     1
33    12
34    15
35    19
36    15
37     1
38    17
39    23
40    16
41    17
42    15
43    16
44    18
45    16
46    17
47    22
48    16
49    16
Name: Cyclomatic_Complexity, dtype: int64


In [10]:
df.head(5)

Unnamed: 0,Code,Smell,Cyclomatic_Complexity
0,public void CodeCheck(){\n if (n == JOp...,1,17
1,public void CodeCheck(){\n if (n == JOption...,0,10
2,public void CodeCheck(){\n if (n == JOption...,0,9
3,public void LoanCalculator(){\r\n if (d...,1,46
4,"public void analyzeData() {\r\n int x = 10,...",0,8


In [11]:
X = df['Cyclomatic_Complexity'].values.reshape(-1, 1)

In [12]:
y = df['Smell'].values

In [13]:
from sklearn.model_selection import train_test_split

In [14]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)

In [15]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,precision_score
knc = KNeighborsClassifier()

In [16]:
knc.fit(X_train,y_train)
y_pred = knc.predict(X_test)
print(y_pred)
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(precision_score(y_test,y_pred))

[1 1 0 0 0 1 1 1 1 1]
0.9
[[3 1]
 [0 6]]
0.8571428571428571


In [19]:
def test_model_with_code(java_code, model):
    
    complexity = calculate_cyclomatic_complexity(java_code)
    complexity = np.array(complexity).reshape(1, -1)
    prediction = model.predict(complexity)
    predicted_smell = encoder.inverse_transform(prediction)[0]

    print(f"Cyclomatic Complexity: {complexity[0][0]}")
    print(f"Predicted Smell: {predicted_smell}")

java_code = """
public void complexMethod(int a, int b, int c, int d) {
        if (a > 0) {
            if (b > 0) {
                if (c > 0) {
                    System.out.println("Case 1");
                } else {
                    System.out.println("Case 2");
                }
            } else if (b < 0) {
                if (c == 0) {
                    System.out.println("Case 3");
                } else if (c < 0){
                    System.out.println("C value is negative");
                } else if (c =5 ){
                    System.out.println("C value is 5");
                } else {
                    System.out.println("Case 4");
                }
            } else {
                System.out.println("Case 5");
            }
        } else if (a < 0) {
            if (d > 10) {
                System.out.println("Case 6");
            } else if (d < 5) {
                System.out.println("Case 7");
            } else {
                switch (b) {
                    case 1:
                        System.out.println("Case 8");
                        break;
                    case 2:
                        System.out.println("Case 9");
                        break;
                    case 3:
                        System.out.println("Case 10");
                        break;
                    default:
                        System.out.println("Default Case");
                }
            }
        } else {
            for (int i = 0; i < c; i++) {
                if (i  == 0) {
                    System.out.println("Equal to zero");
                } else if (i < 0) {
                    System.out.println("Negative");
                }  else {
                    System.out.println("Positive");
                }
            }
        }

        try {
            int result = a / b;
            System.out.println("Result: " + result);
        } catch (ArithmeticException e) {
            System.out.println("Division by zero");
        }
    }

"""

# Test the model with the provided Java code
test_model_with_code(java_code, knc)


Cyclomatic Complexity: 16
Predicted Smell: 1


In [53]:
with open('../Pkl File/long_method.pkl', 'wb') as model_file:
    pickle.dump(knc, model_file)