In [24]:
!pip install evidently scikit-learn category_encoders
!pip install evidently --upgrade
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from category_encoders import TargetEncoder
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset, TargetDriftPreset, RegressionPreset
from evidently.metrics import ColumnDriftMetric, RegressionQualityMetric
from evidently.test_suite import TestSuite
# Import the ColumnMapping class
from evidently.pipeline.column_mapping import ColumnMapping
import warnings
warnings.filterwarnings('ignore')

df = pd.read_csv("marketing_campaign.csv", delimiter='\t') # Specify delimiter to correctly parse columns

# Simplified Preprocessing (Example - Adapt based on your data)
# Replace missing values with the mean for numeric columns only
numeric_cols = df.select_dtypes(include=np.number).columns
df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].mean())

# Instead of encoding, keep categorical features as objects
# for col in df.select_dtypes(include='object').columns:
#     df[col] = df[col].astype('category') #Convert object to category data types
#     df[col] = df[col].cat.codes  # Encode categorical features

# Split data into training and testing sets
train_data, test_data = train_test_split(df, test_size=0.3, random_state=42)

# Get numerical and categorical features
numerical_features = train_data.select_dtypes(include=np.number).columns.tolist()
categorical_features = train_data.select_dtypes(include='object').columns.tolist()  # Use 'object' to get unencoded categorical features

# Create a ColumnMapping object
column_mapping = ColumnMapping()
# Assign the features and target
column_mapping.numerical_features = numerical_features
column_mapping.categorical_features = categorical_features
column_mapping.target = 'Response'  # **Important: Specify 'Response' as the target**

# Create Data Drift Report
data_drift_report = Report(metrics=[
    DataDriftPreset()
])
data_drift_report.run(current_data=test_data, reference_data=train_data, column_mapping=column_mapping)
data_drift_report.save_html('data_drift_report.html')



Data drift report generated successfully: data_drift_report.html
