In [2]:
import pandas as pd
import joblib
from sklearn.preprocessing import LabelEncoder
import evidently

In [58]:
v1_df = pd.read_parquet('/Users/apal/Documents/PathtoAI/AnalyticsVidhya/Mlops/data/data_drift/v1.gzip')
v2_df = pd.read_parquet('/Users/apal/Documents/PathtoAI/AnalyticsVidhya/Mlops/data/data_drift/v2.gzip')

In [59]:
v1_df['Activity'].value_counts()

LAYING                16762
WALKING               16728
WALKING_UPSTAIRS      16675
STANDING              16645
WALKING_DOWNSTAIRS    16627
SITTING               16563
Name: Activity, dtype: int64

In [60]:
v2_df['Activity'].value_counts()

SITTING               28827
LAYING                16762
WALKING_UPSTAIRS      16675
STANDING              16645
WALKING_DOWNSTAIRS    16627
WALKING                4464
Name: Activity, dtype: int64

In [61]:
le = LabelEncoder()
v1_df['Activity'] = le.fit_transform(v1_df['Activity'])
v2_df['Activity'] = le.fit_transform(v2_df['Activity'])

In [62]:
v1_df.rename({'Activity':'target'}, axis =1,inplace = True)
v2_df.rename({'Activity':'target'}, axis =1,inplace = True)

### Prediction

In [63]:
## Load Features and model weight
train_features = joblib.load("./model_features/train_features.joblib")

model = joblib.load("./model_weights/my_random_forest.joblib")


In [64]:
train_features

array(['tGravityAcc-energy()-X', 'angle(X,gravityMean)',
       'tGravityAcc-mean()-X', 'tGravityAcc-min()-Y',
       'tGravityAcc-max()-X', 'tGravityAcc-max()-Y',
       'tGravityAcc-min()-X', 'tGravityAcc-mean()-Y',
       'angle(Y,gravityMean)', 'fBodyAccJerk-entropy()-Y'], dtype=object)

In [65]:
## V1 prediction
v1_test_features = v1_df[train_features]
v1_df['prediction'] = model.predict(v1_test_features)

In [66]:
## V2 prediction
v2_test_features = v2_df[train_features]
v2_df['prediction'] = model.predict(v2_test_features)

### There has been a Software update and we have data from both V1 and V2 softwares 

#### Let's compare both the data and see of there has been any data drift 

### Data Drift

In [67]:
#!pip install evidently

In [68]:
#pip install somelibrary

In [69]:
from evidently.report import Report

from evidently.test_suite import TestSuite
from evidently.test_preset import DataStabilityTestPreset
from evidently.test_preset import DataQualityTestPreset
from evidently.metric_preset import TargetDriftPreset, DataQualityPreset
from evidently.metric_preset import DataDriftPreset , ClassificationPreset


In [70]:
data_drift_report = Report(metrics=[
    DataDriftPreset(),
])

data_drift_report.run(reference_data=v1_df, current_data=v2_df)

In [71]:
data_drift_report.save_html("../output/data_drift_report.html")

### Data Quality Report

In [72]:
data_quality_report = Report(metrics=[
    DataQualityPreset(),
])

data_quality_report.run(reference_data=v1_df, current_data=v2_df)
data_quality_report

In [73]:
data_quality_report.save_html("../output/data_quality_report.html")

### Concept Drift

In [74]:
num_target_drift_report = Report(metrics=[
    TargetDriftPreset(),
])

num_target_drift_report.run(reference_data=v1_df, current_data=v2_df)

In [75]:
num_target_drift_report.save_html("../output/concept_drift_report.html")

In [76]:
classification_performance_report = Report(metrics=[
    ClassificationPreset(),
])

classification_performance_report.run(reference_data=v1_df, current_data=v2_df)

classification_performance_report

In [77]:
classification_performance_report.save_html("../output/classification_performance_report.html")