# Demonstration of DataVizML

This notebook will demonstrate the capabilities of the `DataVizML` library

## Import libraries

In [None]:
from datavizml.singledistribution import SingleDistribution
from datavizml.exploratorydataanalysis import ExploratoryDataAnalysis
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer, load_diabetes, load_iris

## Load data

In [None]:
# binary classification dataset
X_binary_classification, y_binary_classification = load_breast_cancer(
    return_X_y=True, as_frame=True
)
X_binary_classification = X_binary_classification.iloc[:, :8]
y_binary_classification = y_binary_classification.astype(bool)

# regression dataset
X_regression, y_regression = load_diabetes(return_X_y=True, as_frame=True)

# create alternative target for classification with large class imbalance
y_regression_class = y_regression > 50

# multiclass dataset
X_multiclass_classification, y_multiclass_classification = load_iris(
    return_X_y=True, as_frame=True
)
class_map = {k: v for k, v in enumerate(load_iris()["target_names"])}
y_multiclass_classification = y_multiclass_classification.map(class_map)

## Demonstrate with binary classification

In [None]:
# create and run eda for data and target
eda = ExploratoryDataAnalysis(
    data=X_binary_classification,
    target=y_binary_classification,
    ncols=4,
    figure_width=18,
    axes_height=2.5,
)
fig = eda()

# set figure layout
fig.tight_layout()

In [None]:
# create and run eda for data and target
eda = ExploratoryDataAnalysis(
    data=X_binary_classification,
    ncols=4,
    figure_width=18,
    axes_height=2,
)
fig = eda()

# set figure layout
fig.tight_layout()

## Demonstrate with regression

In [None]:
# initialise figure
ncols = 5
nrows = -(-(X_regression.shape[1]) // ncols)
fig, ax_all = plt.subplots(ncols=ncols, nrows=nrows, figsize=(18, 3 * nrows))

# loop though all features as an array
for (_, x), ax in zip(X_regression.items(), ax_all.flatten()):
    sd = SingleDistribution(feature=x, ax=ax, target=y_regression)
    sd()

# set figure layout
fig.tight_layout()

In [None]:
sd.to_dict()

## Demonstrate with multiclass classification

In [None]:
# create and run eda for data and target
eda = ExploratoryDataAnalysis(
    data=X_multiclass_classification,
    target=y_multiclass_classification,
    ncols=4,
    figure_width=18,
    axes_height=4,
)
fig = eda()

# set figure layout
fig.tight_layout()

In [None]:
eda.summary()

In [None]:
# create and run eda for data and target
eda = ExploratoryDataAnalysis(
    data=pd.concat([X_multiclass_classification, y_multiclass_classification], axis=1),
    ncols=5,
    prediction_matrix_full=True,
    figure_width=18,
    axes_height=4,
)
fig = eda()

# set figure layout
fig.tight_layout()

In [None]:
# plot prediction heatmap
fig, ax = plt.subplots()
eda.prediction_score_plot(ax=ax)
fig.tight_layout()

## Demonstrate with imbalanced binary classification

In [None]:
# create and run eda for data and target
eda = ExploratoryDataAnalysis(
    data=X_regression,
    target=y_regression_class,
    ncols=4,
    figure_width=18,
    axes_height=4,
)
fig = eda()

# set figure layout
fig.tight_layout()

# display prediction matrix
eda.prediction_matrix

In [None]:
# create and run eda for data and target
eda = ExploratoryDataAnalysis(
    data=X_regression,
    target=y_regression_class,
    target_rebalance=True,
    ncols=4,
    figure_width=18,
    axes_height=4,
)
fig = eda()

# set figure layout
fig.tight_layout()

# display prediction matrix
eda.prediction_matrix

## Demonstrate transformation options

In [None]:
raw = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] * 100
data_transform = pd.DataFrame(
    {
        "raw": raw,
        "square": np.sqrt(raw),
        "square-root": np.square(raw),
        "log-2": np.exp2(raw),
        "exp-2": np.log2(raw),
    }
)

In [None]:
# create and run eda for data and target
eda = ExploratoryDataAnalysis(
    data=data_transform,
    ncols=5,
    figure_width=18,
    axes_height=3,
)
fig = eda()

# set figure layout
fig.tight_layout()

In [None]:
# create and run eda for data and target
eda = ExploratoryDataAnalysis(
    data=data_transform,
    ncols=5,
    data_deskew=True,
    figure_width=18,
    axes_height=3,
)
fig = eda()

# set figure layout
fig.tight_layout()

In [None]:
eda.summary()