In [4]:
#SVM Model

In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [6]:
df = pd.read_csv("dmu-work-c/transformers.csv")

In [7]:
version_encoder = LabelEncoder()
file_encoder = LabelEncoder()
df["version_enc"] = version_encoder.fit_transform(df["version"])
df["file_enc"] = file_encoder.fit_transform(df["file"])

In [8]:
X = df[["version_enc", "file_enc"]].values
y = df["has_smell"].values

In [9]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [10]:
split_idx = int(0.8 * len(df))
X_train, X_test = X_scaled[:split_idx], X_scaled[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

In [11]:
svm_model = SVC(kernel='linear', probability=False, random_state=42, verbose=True, decision_function_shape="ovr")
svm_model.fit(X_train, y_train)

[LibSVM]

In [12]:
y_pred = svm_model.predict(X_test)

In [13]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred, zero_deivision=0))

Accuracy: 0.45310969157816355
Confusion Matrix:
 [[69372     0     0 ...     0     0     0]
 [39891     0     0 ...     0     0     0]
 [27277     0     0 ...     0     0     0]
 ...
 [    5     0     0 ...     0     0     0]
 [    2     0     0 ...     0     0     0]
 [    2     0     0 ...     0     0     0]]


TypeError: got an unexpected keyword argument 'zero_deivision'

In [None]:
# ANN Model

In [20]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

In [21]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

num_classes = len(np.unique(y_encoded))
y_categorical = to_categorical(y_encoded, num_classes=num_classes)
y_train_cat = y_categorical[:split_idx]
y_test_cat = y_categorical[split_idx:]

In [22]:
ann_model = Sequential([
    Dense(units=32, activation='relu', input_shape=(X.shape[1],)),
    Dense(units=16, activation='relu'),
    Dense(units=num_classes, activation='softmax')  # Sortie binaire
])

In [23]:
ann_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [24]:
ann_model.fit(X_train, y_train_cat, epochs=10, batch_size=32, verbose=1, validation_data=(X_test, y_test_cat))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1f8d3c14430>

In [25]:
ann_probs = ann_model.predict(X_test)
ann_preds = np.argmax(ann_probs, axis=1)
print("Accuracy:", accuracy_score(y_test, ann_preds))
print("Confusion Matrix:\n", confusion_matrix(y_test, ann_preds))
print("Classification Report:\n", classification_report(y_test, ann_preds))

Accuracy: 0.45310969157816355
Confusion Matrix:
 [[69372     0     0 ...     0     0     0]
 [39891     0     0 ...     0     0     0]
 [27277     0     0 ...     0     0     0]
 ...
 [    5     0     0 ...     0     0     0]
 [    2     0     0 ...     0     0     0]
 [    2     0     0 ...     0     0     0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.45      1.00      0.62     69372
           1       0.00      0.00      0.00     39891
           2       0.00      0.00      0.00     27277
           3       0.00      0.00      0.00      7094
           4       0.00      0.00      0.00      2740
           5       0.00      0.00      0.00      2106
           6       0.00      0.00      0.00      1056
           7       0.00      0.00      0.00       891
           8       0.00      0.00      0.00       540
           9       0.00      0.00      0.00       431
          10       0.00      0.00      0.00       237
          11  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [26]:
# Code stacking (SVR)

In [27]:
# Import
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

In [28]:
# Obtenir les prédictions des deux modèles
svm_preds = svm_model.decision_function(X_test)

In [29]:
# Créer les ebtrées du modèle d'ensemble (stacking)
stacked_inputs = np.hstack((ann_probs, svm_preds))

In [30]:
meta_models = []
meta_preds = []

for class_idx in range(num_classes):
    svr = SVR()
    svr.fit(stacked_inputs, (y_test == class_idx).astype(float))
    meta_models.append(svr)
    pred = svr.predict(stacked_inputs)
    meta_preds.append(pred)

meta_preds = np.vstack(meta_preds).T

In [31]:
# Prédictions finales
final_preds = np.argmax(meta_preds, axis=1)

In [32]:
# Evaluation
print("Accuracy stacking SVR :", accuracy_score(y_test, final_preds))
print("Confusing Maxtric:\n", confusion_matrix(y_test, final_preds))
print("Classification Report:\n", classification_report(y_test, final_preds))

Accuracy stacking SVR : 0.41106582539744746
Confusing Maxtric:
 [[59967  1355  1553 ...     0     0     0]
 [30244  1359  1614 ...     0     0     0]
 [24086   269  1489 ...     0     0     0]
 ...
 [    1     0     0 ...     0     0     0]
 [    0     0     0 ...     0     0     0]
 [    0     0     0 ...     0     0     0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.47      0.86      0.61     69372
           1       0.41      0.03      0.06     39891
           2       0.27      0.05      0.09     27277
           3       0.00      0.00      0.00      7094
           4       0.00      0.00      0.00      2740
           5       0.00      0.00      0.00      2106
           6       0.01      0.11      0.02      1056
           7       0.00      0.00      0.00       891
           8       0.00      0.00      0.00       540
           9       0.01      0.02      0.01       431
          10       0.00      0.00      0.00       237

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
