# Monitoreo con Grafana

In [185]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
# clasificación
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

from evidently import Report
from evidently import DataDefinition
from evidently import Dataset
from evidently.metrics import ValueDrift, DriftedColumnsCount, MissingValueCount

In [15]:
bank_marketing_raw_data = pd.read_csv("../dsrp-machine-learning-engineering-3/data/bank-marketing.csv")
bank_marketing_raw_data

Unnamed: 0,age,age group,eligible,job,salary,marital,education,marital-education,targeted,default,...,contact,day,month,duration,campaign,pdays,previous,poutcome,y,response
0,58,5,Y,management,100000,married,tertiary,married-tertiary,yes,no,...,unknown,5,may,261,1,-1,0,unknown,no,0
1,44,4,Y,technician,60000,single,secondary,single-secondary,yes,no,...,unknown,5,may,151,1,-1,0,unknown,no,0
2,33,3,Y,entrepreneur,120000,married,secondary,married-secondary,yes,no,...,unknown,5,may,76,1,-1,0,unknown,no,0
3,47,4,Y,blue-collar,20000,married,unknown,married-unknown,no,no,...,unknown,5,may,92,1,-1,0,unknown,no,0
4,33,3,Y,unknown,0,single,unknown,single-unknown,no,no,...,unknown,5,may,198,1,-1,0,unknown,no,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45206,51,5,Y,technician,60000,married,tertiary,married-tertiary,yes,no,...,cellular,17,nov,977,3,-1,0,unknown,yes,1
45207,71,7,N,retired,55000,divorced,primary,divorced-primary,yes,no,...,cellular,17,nov,456,2,-1,0,unknown,yes,1
45208,72,7,N,retired,55000,married,secondary,married-secondary,yes,no,...,cellular,17,nov,1127,5,184,3,success,yes,1
45209,57,5,Y,blue-collar,20000,married,secondary,married-secondary,yes,no,...,telephone,17,nov,508,4,-1,0,unknown,no,0


In [8]:
NUMERICAL_FEATURES = ["age", "salary", "duration"]
CATEGORICAL_FEATURES = ["eligible", "marital", "education"]

In [10]:
X = bank_marketing_raw_data[NUMERICAL_FEATURES + CATEGORICAL_FEATURES]
y = bank_marketing_raw_data["response"]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, test_size=0.25)

In [16]:
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), NUMERICAL_FEATURES),
        ("cat", OneHotEncoder(handle_unknown="ignore"), CATEGORICAL_FEATURES)
    ]
)

X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

In [20]:
clf = GradientBoostingClassifier()
clf.fit(X_train_processed, y_train)

0,1,2
,loss,'log_loss'
,learning_rate,0.1
,n_estimators,100
,subsample,1.0
,criterion,'friedman_mse'
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_depth,3
,min_impurity_decrease,0.0


In [23]:
y_predictions = clf.predict(X_test_processed)
y_predictions

array([0, 0, 0, ..., 0, 1, 0], shape=(11303,))

In [24]:
accuracy_score(y_test, y_predictions)

0.8906485003981244

# Cargar datos a influxDB

In [None]:
import requests
from datetime import datetime, timedelta


db_params = {
    "db": "evidently_metrics",
    "u": "admin",
    "p": "admin",
    "precision": "ns"
}
headers = {
    "Content-Type": "text/plain; charset=utf-8",
}

BASE_URL = "http://localhost:8086"
DATA_POINTS = 100
accuracy_data = []
for i in range(DATA_POINTS):
    timestamp = datetime.now() - timedelta(hours=100) + timedelta(hours=i)
    _ts_str = timestamp.strftime('%Y-%m-%dT%H:%M:%SZ')
    simulated_accuracy =  round(accuracy_score(y_test, y_predictions) + np.random.normal(0.03, 0.01), 4)

    line = f"model_performance_up2 accuracy={simulated_accuracy} {int(timestamp.timestamp()) * 1000000000}"
    accuracy_data.append(line)

In [179]:
r = requests.post(f"{BASE_URL}/query?db=evidently_metrics&u=admin&p=admin", data={"q": "SHOW DATABASES"})
r.json()

{'results': [{'statement_id': 0,
   'series': [{'name': 'databases',
     'columns': ['name'],
     'values': [['evidently_metrics'], ['_internal']]}]}]}

In [180]:
payload='\n'.join(accuracy_data)
write_r = requests.post(f"{BASE_URL}/write", params=db_params,  data=payload, headers=headers)

In [181]:
write_r

<Response [204]>

In [149]:
payload

'model_performance accuracy=0.9189 1761791362\nmodel_performance accuracy=0.9248 1761794962\nmodel_performance accuracy=0.9272 1761798562\nmodel_performance accuracy=0.9075 1761802162\nmodel_performance accuracy=0.9321 1761805762\nmodel_performance accuracy=0.9259 1761809362\nmodel_performance accuracy=0.9266 1761812962\nmodel_performance accuracy=0.9208 1761816562\nmodel_performance accuracy=0.9092 1761820162\nmodel_performance accuracy=0.9318 1761823762'

In [150]:
timestamp

datetime.datetime(2025, 10, 30, 6, 29, 22, 673164)

In [159]:
datetime.fromtimestamp(int(timestamp.timestamp() )) 

datetime.datetime(2025, 10, 26, 2, 30, 21)

In [158]:
payload

'model_performance_up accuracy=0.9077 1761431421\nmodel_performance_up accuracy=0.9195 1761435021\nmodel_performance_up accuracy=0.9157 1761438621\nmodel_performance_up accuracy=0.9234 1761442221\nmodel_performance_up accuracy=0.9129 1761445821\nmodel_performance_up accuracy=0.9167 1761449421\nmodel_performance_up accuracy=0.9379 1761453021\nmodel_performance_up accuracy=0.9178 1761456621\nmodel_performance_up accuracy=0.9128 1761460221\nmodel_performance_up accuracy=0.9215 1761463821'

# Evidently

In [202]:
report = Report(metrics = [
    #ValueDrift(column='prediction'),
    DriftedColumnsCount(method="psi"),
   # MissingValueCount(column='prediction'),
])

drift_report = report.run(reference_data=X_train, current_data=X_test)

In [213]:
drifted_columns_count = int(drift_report.dict()["metrics"][0]["value"]["count"])

for i in range(100):
    timestamp = datetime.now() - timedelta(hours=100) + timedelta(hours=i)
    drift_payload = f"drift_metrics drifted_columns_count={drifted_columns_count + np.random.randint(10)} {int(timestamp.timestamp()) * 1000000000}"
    write_drift = requests.post(f"{BASE_URL}/write", params=db_params,  data=drift_payload, headers=headers)

0.0

7