In [1]:
!pip install ucimlrepo



In [7]:
#Import required libraries
from preprocessing import split_data,pre_processing, df_to_arr
from sklearn.metrics import RocCurveDisplay
import mlflow
import mlflow.sklearn
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVC
from sklearn.metrics import (
    roc_curve, auc, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
)

# Load the Data and perform preprocessing 

In [2]:
#pre-processed and split the data obtained from the function defined in preprocessing.py
x, y = pre_processing()

# Now you can split the data
x_train, x_test, y_train, y_test = split_data(x, y)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['class'] = df['class'].map(dict)  # Map class labels


In [3]:
x_train, x_test, y_train, y_test = df_to_arr(x_train, x_test, y_train, y_test)

# Model hyperparameter tunning by grid search 

In [None]:
#finding fine grid (where c and gamma are two values before and after the values obtained from loose coarse,
#with a difference of 0.25), kernel best acheived was rbf

param_grid = {
    'C': np.power(2, np.arange( 2.50,3.75, 0.25)),
    'kernel': ['rbf'],
    'gamma': np.power(2, np.arange( -2.50,-1.25, 0.25))
}

In [None]:
# Create the grid search object
grid_search = GridSearchCV(
    estimator=SVC(),
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1,  # Use all available cores for parallel execution
    verbose=2   
)

In [None]:
grid_search.fit(x_train, y_train)

In [None]:
grid_search.best_params_ #best parameters from grid search

In [None]:
grid_results= pd.DataFrame(grid_search.cv_results_) #checking results obtained from fine grid to compare from hyperparameters obtained from loose grid

In [None]:
grid_results[results['param_C']==8] #we got same c value on fine grid which we got in loose grid i.e, 8

In [None]:
grid_search.best_score_

In [8]:
# model = SVC(C=8,probability=True, degree =2) 
svc_model = SVC(C=8,probability=True, gamma =0.25, kernel = 'rbf') #best params
# model = SVC(C=0.8, kernel = 'linear')

# Log the model to MLflow

In [21]:
# Initialize MLflow
mlflow.set_experiment("svm_best_model")
mlflow.set_tracking_uri("http://127.0.0.1:5000/")
# Start an MLflow run
with mlflow.start_run(run_name="svc_model_run"):
    # Log model hyperparameters
    mlflow.log_param("C", 8)
    mlflow.log_param("gamma", 0.25)
    mlflow.log_param("kernel", "rbf")

    # Train the model
    svc_model.fit(x_train, y_train)

    # Predict probabilities & labels
    y_prob = svc_model.predict_proba(x_test)
    y_pred = svc_model.predict(x_test)

    # Compute ROC Curve
    fpr, tpr, _ = roc_curve(y_test, y_prob[:, 1])  # Class 1 (Gamma)
    roc_auc = auc(fpr, tpr)

    # Plot ROC Curve
    plt.figure(figsize=(7, 5))
    plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {roc_auc:.2f})')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.legend()

    # Save ROC curve as artifact
    roc_curve_path = "roc_curve.png"
    plt.savefig(roc_curve_path)
    mlflow.log_artifact(roc_curve_path)
    plt.close()

    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)

    # Plot Confusion Matrix
    plt.figure(figsize=(5, 4))
    sns.heatmap(cm, annot=True, fmt='d', cmap="Blues")
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title('Confusion Matrix')

    # Save Confusion Matrix as artifact
    cm_path = "confusion_matrix.png"
    plt.savefig(cm_path)
    mlflow.log_artifact(cm_path)
    plt.close()

    # Compute Performance Metrics
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)

    # Log Performance Metrics
    mlflow.log_metric("Accuracy", accuracy)
    mlflow.log_metric("F1 Score", f1)
    mlflow.log_metric("Precision", precision)
    mlflow.log_metric("Recall", recall)

    # Log the trained model
    mlflow.sklearn.log_model(svc_model, "svm_model")

    print("✅ Model and metrics logged successfully in MLflow!")


In [11]:
#register the model

In [12]:
# https://www.mlflow.org/docs/latest/model-registry/#adding-an-mlflow-model-to-the-model-registry

# Register the Model

In [None]:
# We use api below to register using model uri and runid

In [22]:
#it also helps in maintaining the version 
model_name = 'Support vector machine model' #give a name of model you want to register
run_id = input('Enter run id:')
model_uri = f'runs:/{run_id}/model'
result = mlflow.register_model(
    model_uri, model_name 
)

# Load the registered ML model to make predictions

In [17]:
import mlflow
run_id = f"your model's run ID"
logged_model = f'runs:/{run_id}/svm_model' #from  MLFlow url

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)



Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

In [None]:
#load the registered model
model_uri = f'runs:/{run_id}/{model_name}'
# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(model_uri)

In [19]:
print(type(loaded_model)) 

<class 'mlflow.pyfunc.PyFuncModel'>


In [18]:
y_pred = loaded_model.predict(x_test)

In [20]:
y_pred

array([0, 0, 0, ..., 1, 0, 1], shape=(3781,))

In [None]:
#After comparing different models and their results, bet model should be used for deployment 