In [39]:
import pandas as pd 
import numpy as np
import sklearn
from sklearn import metrics 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import statsmodels.api as sm

import mlflow
import mlflow.sklearn
from mlflow.pyfunc import PythonModel
from mlflow.models import infer_signature
import mlflow.statsmodels
from mlflow import MlflowClient


In [40]:
prep = pd.read_parquet("../donnees/preproces.parquet")

In [41]:
y = prep['is_claim']
x = prep.drop('is_claim',axis=1)

In [42]:
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(x, y, test_size = 0.2, random_state = 5)


In [43]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [44]:
# train a model
model1 = LogisticRegression(max_iter=1000,random_state=0).fit(X_train, y_train)


In [45]:
model1.fit(X_train, y_train)

In [46]:
preds = model1.predict(X_test)
confmtrx = np.array(confusion_matrix(y_test, preds))
confusion = pd.DataFrame(confmtrx, index=['approved', 'not_approved'],
columns=['predicted_approved', 'predicted_not_approved'])
confusion

Unnamed: 0,predicted_approved,predicted_not_approved
approved,10964,0
not_approved,755,0


In [47]:
# Evaluate accuracy and log the metric using MLflow
accuracy = accuracy_score(y_test, preds)

In [48]:
logit_model=sm.Logit(y_train,sm.add_constant(X_train))
logit_model
result=logit_model.fit()
stats1=result.summary()
print(stats1)

2024/01/27 23:10:08 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'fb0726ca3f3d4657b3179cc4fd94ea07', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current statsmodels workflow


         Current function value: 0.232738
         Iterations: 35




                           Logit Regression Results                           
Dep. Variable:               is_claim   No. Observations:                46873
Model:                          Logit   Df Residuals:                    46856
Method:                           MLE   Df Model:                           16
Date:                Sat, 27 Jan 2024   Pseudo R-squ.:                 0.01981
Time:                        23:10:10   Log-Likelihood:                -10909.
converged:                      False   LL-Null:                       -11130.
Covariance Type:            nonrobust   LLR p-value:                 9.228e-84
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -2.7567   2.36e+06  -1.17e-06      1.000   -4.62e+06    4.62e+06
x1            -0.0131   8.33e+04  -1.57e-07      1.000   -1.63e+05    1.63e+05
x2             0.3666        nan        nan        n

In [51]:
mlflow.set_tracking_uri("http://192.168.68.200:8029")
experiment_name1 = "oop6"
mlflow.set_experiment(experiment_name1)
mlflow.statsmodels.autolog()
with mlflow.start_run() as run:
        score = model1.score(X_train, y_train)
        print(f"Score: {score}")
        mlflow.log_metric("score", score)
        mlflow.log_metric("accuracy", accuracy)
        predictions = model1.predict(X_train)
        signature = infer_signature(X_train, predictions)
        mlflow.sklearn.log_model(model1, "model", signature=signature)


Score: 0.9361466089219806


In [53]:
#Set the experiment name or ID where the run was logged


# Search for the run and retrieve the logged metrics
runs = mlflow.search_runs(experiment_name=experiment_name1)
latest_run = runs.iloc[0]  # Assumes you want the latest run
accuracy_metric = latest_run['accuracy']

print(f"Accuracy metric: {accuracy_metric}")

TypeError: search_runs() got an unexpected keyword argument 'experiment_name'

loaded_logreg_model = mlflow.pyfunc.load_model(sklearn_path)

loaded_logreg_model.predict(X_train)

In [None]:
import pickle
pickle.dump(model1, open("logi_regre.pickle", "wb"))