In [None]:
import pandas as pd 
import numpy as np
import sklearn
from sklearn import metrics 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import statsmodels.api as sm

import mlflow
import mlflow.sklearn
from mlflow.pyfunc import PythonModel
from mlflow.models import infer_signature
import mlflow.statsmodels
from mlflow import MlflowClient


In [None]:
prep = pd.read_parquet("donnees/preproces.parquet")

In [None]:
y = prep['is_claim']
x = prep.drop('is_claim',axis=1)

In [None]:
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(x, y, test_size = 0.2, random_state = 5)


In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [None]:
# train a model
model1 = LogisticRegression(max_iter=1000,random_state=0).fit(X_train, y_train)


In [None]:
model1.fit(X_train, y_train)

In [None]:
preds = model1.predict(X_test)
confmtrx = np.array(confusion_matrix(y_test, preds))
confusion = pd.DataFrame(confmtrx, index=['approved', 'not_approved'],
columns=['predicted_approved', 'predicted_not_approved'])
confusion

In [None]:
# Evaluate accuracy and log the metric using MLflow
accuracy = accuracy_score(y_test, preds)

In [None]:
logit_model=sm.Logit(y_train,sm.add_constant(X_train))
logit_model
result=logit_model.fit()
stats1=result.summary()
print(stats1)

In [None]:

experiment_name1 = "oop6"
mlflow.set_experiment(experiment_name1)
mlflow.statsmodels.autolog()
with mlflow.start_run() as run:
        score = model1.score(X_train, y_train)
        print(f"Score: {score}")
        mlflow.log_metric("score", score)
        mlflow.log_metric("accuracy", accuracy)
        predictions = model1.predict(X_train)
        signature = infer_signature(X_train, predictions)
        mlflow.sklearn.log_model(model1, "model", signature=signature)


In [None]:
#Set the experiment name or ID where the run was logged


# Search for the run and retrieve the logged metrics
runs = mlflow.search_runs(experiment_name=experiment_name1)
latest_run = runs.iloc[0]  # Assumes you want the latest run
accuracy_metric = latest_run['accuracy']

print(f"Accuracy metric: {accuracy_metric}")

loaded_logreg_model = mlflow.pyfunc.load_model(sklearn_path)

loaded_logreg_model.predict(X_train)

In [None]:
import pickle
pickle.dump(model1, open("logi_regre.pickle", "wb"))