# This script contains helper functions for formatting. 

In [9]:
# general packages
from IPython.display import HTML, display
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd

In [12]:
"""
This functions sets the background color of a Jupyter cell.

Inputs: 
color (string): name of cell color

Outputs: 
None

Source: https://stackoverflow.com/questions/49429585/how-to-change-the-background-color-of-a-single-cell-in-a-jupyter-notebook-jupy
""" 
def set_background(color):         
    script = ("var cell = this.closest('.code_cell');"         
              "var editor = cell.querySelector('.input_area');"         
              "editor.style.background='{}';"         
              "this.parentNode.removeChild(this)".format(color))  
              
    display(HTML('<img src onerror="{}">'.format(script)))
    

In [42]:
"""
This function splits the data into a train and test set and stratifies the target values. 

Inputs:
data (pandas dataframe): dataframe to split
target (string): CRT target feature (numeric, conceptual, or both)
my_test_size (float): size of test split out of 1.0
my_state (integer): random state for train/test split

Outputs:
X_train (pandas dataframe): training data
Y_train (pandas dataframe): testing data
"""

def split(data, target, my_test_size, my_state):
    
    # generate all non-target columns
    x_cols = []
    for i in data.columns:
        if not i.startswith('CRT'):
            x_cols.append(i)
    
    # define X and Y
    X = data[x_cols]
    Y = data[target]
    
    # stratify CRT scores 
    bin_count = 0
    for i in data[target].value_counts() > 1:
        if i:
            bin_count += 1      
    bin_count -= 1

    bins = np.linspace(0, 1, bin_count)
    y_binned = np.digitize(Y, bins)
    
    # split
    X_train, X_test, Y_train, Y_test  = train_test_split(X, Y, test_size=my_test_size, stratify=y_binned, 
                                                         random_state=my_state)
    
    # recombine X and Y across train and test
    X_train[target] = Y_train
    X_test[target] = Y_test
    
    return [X_train, X_test]