In [None]:
#SVM Model

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [3]:
df = pd.read_csv("dmu-work/test.csv")

In [4]:
version_encoder = LabelEncoder()
file_encoder = LabelEncoder()
df["version_enc"] = version_encoder.fit_transform(df["version"])
df["file_enc"] = file_encoder.fit_transform(df["file"])

In [5]:
X = df[["version_enc", "file_enc"]].values
y = df["has_smell"].values

In [6]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [7]:
split_idx = int(0.8 * len(df))
X_train, X_test = X_scaled[:split_idx], X_scaled[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

In [8]:
svm_model = SVC(kernel='linear', probability=False, random_state=42, verbose=True)
svm_model.fit(X_train, y_train)

[LibSVM]

In [12]:
y_pred = svm_model.predict(X_test)

In [14]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.682583478799482
Confusion Matrix:
 [[ 7438 31280]
 [ 2542 65294]]
Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.19      0.31     38718
           1       0.68      0.96      0.79     67836

    accuracy                           0.68    106554
   macro avg       0.71      0.58      0.55    106554
weighted avg       0.70      0.68      0.62    106554



In [15]:
# ANN Model

In [16]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [26]:
ann_model = Sequential([
    Dense(units=32, activation='relu', input_shape=(X.shape[1],)),
    Dense(units=16, activation='relu'),
    Dense(units=1, activation='sigmoid')  # Sortie binaire
])

In [27]:
ann_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [28]:
ann_model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x15a8fa0a700>

In [29]:
y_pred_ann = (ann_model.predict(X_test) > 0.5).astype(int).flatten()
print("Accuracy:", accuracy_score(y_test, y_pred_ann))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_ann))
print("Classification Report:\n", classification_report(y_test, y_pred_ann))

Accuracy: 0.7033053662931471
Confusion Matrix:
 [[11766 26952]
 [ 4662 63174]]
Classification Report:
               precision    recall  f1-score   support

           0       0.72      0.30      0.43     38718
           1       0.70      0.93      0.80     67836

    accuracy                           0.70    106554
   macro avg       0.71      0.62      0.61    106554
weighted avg       0.71      0.70      0.66    106554



In [30]:
# Code stacking (SVR)

In [31]:
# Import
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

In [32]:
# Obtenir les prédictions des deux modèles
ann_preds = ann_model.predict(X_test).flatten()
svm_preds = svm_model.decision_function(X_test)



In [35]:
# Créer les ebtrées du modèle d'ensemble (stacking)
stacked_inputs = np.column_stack((ann_preds, svm_preds))

In [36]:
# Entrîner le modèle méta (SVR)
meta_model = SVR()
meta_model.fit(stacked_inputs, y_test)

In [37]:
# Prédictions finales
final_preds = meta_model.predict(stacked_inputs)

In [38]:
# Evaluation
mse = mean_squared_error(y_test, final_preds)
print("MSE du modèle de stacking (SVR):", mse)
print("Prédictions (extrait):", final_preds[:10])

MSE du modèle de stacking (SVR): 0.2302570881338598
Prédictions (extrait): [0.83940172 0.84363787 0.84772244 0.85165719 0.8554441  0.85908524
 0.86258261 0.86593839 0.86915482 0.87223426]


In [43]:
final_preds_binary = (final_preds >= 0.5).astype(int)

# Accuracy
accuracy = accuracy_score(y_test, final_preds_binary)
print("Accuracy stacking SVR :", accuracy)

# Evaluation complète
print("Confusing Maxtric:\n", confusion_matrix(y_test, final_preds_binary))
print("Classification Report:\n", classification_report(y_test, final_preds_binary))

Accuracy stacking SVR : 0.7080165925258555
Confusing Maxtric:
 [[12213 26505]
 [ 4607 63229]]
Classification Report:
               precision    recall  f1-score   support

           0       0.73      0.32      0.44     38718
           1       0.70      0.93      0.80     67836

    accuracy                           0.71    106554
   macro avg       0.72      0.62      0.62    106554
weighted avg       0.71      0.71      0.67    106554

