In [1]:
import plotly.figure_factory as ff
import numpy as np

x1 = np.random.randn(200) - 1
x2 = np.random.randn(200)
x3 = np.random.randn(200) + 1

hist_data = [x1, x2, x3]

group_labels = ['Group 1', 'Group 2', 'Group 3']
colors = ['#835AF1', '#7FA6EE', '#B8F7D4']

# Create distplot with curve_type set to 'normal'
fig = ff.create_distplot(hist_data, group_labels, colors=colors, bin_size=.25,
                         show_curve=True, show_hist=False, show_rug=False)

# Add title
fig.update_layout(title_text='Hist and Rug Plot')
fig.show()

# Nanny ML

In [2]:
import nannyml as nml
import pandas as pd
from IPython.display import display

reference_df, analysis_df, _ = nml.load_us_census_ma_employment_data()
display(reference_df.head())
display(analysis_df.head())

Unnamed: 0,id,AGEP,SCHL,MAR,RELP,DIS,ESP,CIT,MIG,MIL,...,NATIVITY,DEAR,DEYE,DREM,SEX,RAC1P,employed,year,prediction,predicted_probability
0,0,62.0,16,1,17,1,0,1,1,4,...,1,2,2,2,2,1,0,2015,0,0.121211
1,1,48.0,21,1,0,2,0,1,1,4,...,1,2,2,2,2,1,0,2015,1,0.816033
2,2,47.0,21,1,1,2,0,1,1,4,...,1,2,2,2,1,1,0,2015,1,0.951815
3,3,34.0,12,5,0,2,0,1,3,4,...,1,2,2,2,1,2,0,2015,1,0.563825
4,4,33.0,23,5,0,2,0,5,1,4,...,2,2,2,2,1,1,1,2015,1,0.944436


Unnamed: 0,id,AGEP,SCHL,MAR,RELP,DIS,ESP,CIT,MIG,MIL,ANC,NATIVITY,DEAR,DEYE,DREM,SEX,RAC1P,year,prediction,predicted_probability
0,68785,46.0,21,1,0,2,0,3,1,4,2,1,2,2,2,1,1,2016,1,0.948828
1,68786,46.0,21,1,1,2,0,1,1,4,1,1,2,2,2,2,1,2016,1,0.772002
2,68787,12.0,9,5,2,2,1,1,1,0,2,1,2,2,2,2,1,2016,0,0.000149
3,68788,52.0,21,3,0,2,0,1,1,4,2,1,2,2,2,2,1,2016,1,0.90607
4,68789,21.0,18,5,2,2,0,1,1,4,2,1,2,2,2,1,1,2016,1,0.699663


In [3]:
chunk_size = 5000

estimator = nml.CBPE(
    problem_type='classification_binary',
    y_pred_proba='predicted_probability',
    y_pred='prediction',
    y_true='employed',
    metrics=['roc_auc'],
    chunk_size=chunk_size,
)

estimator = estimator.fit(reference_df)
estimated_performance = estimator.estimate(analysis_df)

In [4]:
figure = estimated_performance.plot()
figure.show()

In [9]:
feature_column_names = ['AGEP', 'SCHL', 'MAR', 'RELP', 'DIS', 'ESP', 'CIT', 'MIG',
                        'MIL', 'ANC', 'NATIVITY', 'DEAR', 'DEYE', 'DREM', 'SEX', 'RAC1P']

univariate_calculator = nml.UnivariateDriftCalculator(
    column_names=feature_column_names,
    chunk_size=chunk_size
)

univariate_calculator.fit(reference_df)
univariate_drift = univariate_calculator.calculate(analysis_df)

figure = univariate_drift.filter(period='analysis', column_names=['RELP','AGEP', 'SCHL']).plot(kind='drift')
figure.show()

# Create joyplot

In [6]:
import pandas as pd
from typing import List
import numpy as np
from pydantic import BaseModel, ConfigDict

In [7]:
class DistPlotData(BaseModel):
    data: pd.Series
    time: str
    drifted: int

    model_config = ConfigDict(arbitrary_types_allowed=True)

In [8]:
import plotly.graph_objs as go
from typing import Optional
def horizontal_joyplot(data: List[DistPlotData]) -> go.Figure:
    ...
def joyplot(data: List[DistPlotData], color: Optional[dict] = {'drifted': 'red', 'non_drift': 'blue'}) -> go.Figure:
    ...