In [62]:
import pandas as pd
from evidently.report import Report
from evidently.metrics import ColumnDriftMetric, DatasetDriftMetric, DatasetMissingValuesMetric
from evidently.metrics import ColumnQuantileMetric, ColumnCorrelationsMetric
import json
from datetime import datetime, timedelta

df = pd.read_json("processtimes.json")

df = df[df['StationName'] == 'StationName1']

# Define reference and current data with timezone-aware datetime
df

Unnamed: 0,StationGroup,StationName,UnitIdType,SerialNumber,Value,TimeStamp
1,StationGroup1,StationName1,UnitIdType1,1,0.181,2024-10-23 17:19:37.079000+02:00
2,StationGroup1,StationName1,UnitIdType1,14,0.130,2024-10-23 17:19:37.079000+02:00
197,StationGroup1,StationName1,UnitIdType1,128,0.130,2024-10-23 17:19:46.567000+02:00
222,StationGroup1,StationName1,UnitIdType1,72,0.077,2024-10-23 17:19:46.567000+02:00
440,StationGroup1,StationName1,UnitIdType1,369,0.079,2024-10-23 17:19:54.325000+02:00
...,...,...,...,...,...,...
112756,StationGroup1,StationName1,UnitIdType1,25817,0.113,2024-10-23 18:21:35.263000+02:00
113023,StationGroup1,StationName1,UnitIdType1,25861,0.101,2024-10-23 18:21:42.999000+02:00
113024,StationGroup1,StationName1,UnitIdType1,25860,0.114,2024-10-23 18:21:42.999000+02:00
113331,StationGroup1,StationName1,UnitIdType1,25940,0.107,2024-10-23 18:21:52.256000+02:00


In [63]:


current_time = pd.to_datetime("2024-10-23T18:11:37.111000+02:00")
reference_window = current_time - timedelta(minutes=10)
reference_data = df[df['TimeStamp'] <= reference_window]
current_data = df[df['TimeStamp'] > reference_window]

# Evidently Data Drift Report for each station
station_names = df['StationName'].unique()
reports = {}

for station in station_names:
    station_reference_data = reference_data[reference_data['StationName'] == station]
    station_current_data = current_data[current_data['StationName'] == station]
    
    if not station_reference_data.empty and not station_current_data.empty:
        data_drift_report = Report(metrics=[
    ColumnDriftMetric(column_name='Value'),
    DatasetDriftMetric(),
    DatasetMissingValuesMetric(),
    ColumnQuantileMetric(column_name='Value', quantile=0.5),
    ColumnCorrelationsMetric(column_name='Value')
        ])
        data_drift_report.run(reference_data=station_reference_data[['Value']], current_data=station_current_data[['Value']])
        
        print(data_drift_report)
        # Save results as JSON
        drift_result = data_drift_report.as_dict()
        print(drift_result)
        reports[station] = drift_result

# Save reports for further processing or dashboard
with open("drift_reports.json", "w") as f:
    json.dump(reports, f)

print("Drift reports created and saved for each station.")


<evidently.report.report.Report object at 0x3073f9c10>
{'metrics': [{'metric': 'ColumnDriftMetric', 'result': {'column_name': 'Value', 'column_type': 'num', 'stattest_name': 'K-S p_value', 'stattest_threshold': 0.05, 'drift_score': 0.5548181954195551, 'drift_detected': False, 'current': {'small_distribution': {'x': [-0.001, 0.0402, 0.0814, 0.1226, 0.1638, 0.20500000000000002, 0.2462, 0.2874, 0.3286, 0.3698, 0.41100000000000003], 'y': [0.24599842561007612, 1.065993177643663, 13.611912883757544, 7.871949619522436, 0.983993702440304, 0.24599842561007615, 0.08199947520335873, 0.08199947520335867, 0.0, 0.08199947520335867]}}, 'reference': {'small_distribution': {'x': [-0.001, 0.1978, 0.3966, 0.5954, 0.7942, 0.993, 1.1918000000000002, 1.3906, 1.5894000000000001, 1.7882000000000002, 1.987], 'y': [4.6656752106843955, 0.236928819292567, 0.07290117516694368, 0.03645058758347185, 0.009112646895867962, 0.0, 0.0, 0.0, 0.0, 0.009112646895867967]}}}}, {'metric': 'DatasetDriftMetric', 'result': {'drif