In [6]:
import mlflow
import mlflow.sklearn
import statsmodels.formula.api as smf
import statsmodels.api as sm
import numpy as np

data = sm.datasets.get_rdataset("Guerry", "HistData").data
# Start an MLflow run
with mlflow.start_run():
    # Fit OLS regression using ln
    # With R style formulas
    res = smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=data).fit()
    
    # Log parameters
    mlflow.log_param("formula", 'Lottery ~ Literacy + np.log(Pop1831)')
    
    # Log metrics
    mlflow.log_metric("r_squared", res.rsquared)
    mlflow.log_metric("adj_r_squared", res.rsquared_adj)

    # Log the model summary as an artifact
    with open("model_summary.txt", "w") as f:
        f.write(str(res.summary()))
    mlflow.log_artifact("model_summary.txt")
    
    # Log the model
    mlflow.sklearn.log_model(res, "ols_model")



In [7]:
from pprint import pprint

# Fetch the latest run
latest_run = mlflow.search_runs(experiment_ids=['0']).iloc[0]

print("Run ID:", latest_run['run_id'])

print("\nParameters:")
params = {k.split('params.')[-1]: v for k, v in latest_run.items() if k.startswith('params.')}
pprint(params)

print("\nMetrics:")
metrics = {k.split('metrics.')[-1]: v for k, v in latest_run.items() if k.startswith('metrics.')}
pprint(metrics)

print("\nArtifacts:")
client = mlflow.tracking.MlflowClient()
artifacts = client.list_artifacts(latest_run['run_id'])
for artifact in artifacts:
    print(artifact.path)

# If you want to view the content of a specific artifact (e.g., model_summary.txt):
if any(artifact.path == "model_summary.txt" for artifact in artifacts):
    local_path = client.download_artifacts(latest_run['run_id'], "model_summary.txt")
    with open(local_path, "r") as f:
        print("\nModel Summary:")
        print(f.read())
else:
    print("\nNo model_summary.txt found in artifacts.")

Run ID: 25894ca252764dfd8bbde0b562c2428f

Parameters:
{'formula': 'Lottery ~ Literacy + np.log(Pop1831)'}

Metrics:
{'adj_r_squared': np.float64(0.3327711079168274),
 'r_squared': np.float64(0.3484706112599609)}

Artifacts:
model_summary.txt
ols_model

Model Summary:
                            OLS Regression Results                            
Dep. Variable:                Lottery   R-squared:                       0.348
Model:                            OLS   Adj. R-squared:                  0.333
Method:                 Least Squares   F-statistic:                     22.20
Date:                Fri, 28 Jun 2024   Prob (F-statistic):           1.90e-08
Time:                        01:31:28   Log-Likelihood:                -379.82
No. Observations:                  86   AIC:                             765.6
Df Residuals:                      83   BIC:                             773.0
Df Model:                           2                                         
Covariance Type:     