# Functions demo for OrpheusDT

In [2]:
import pandas as pd
from orpheus import OrpheusDT
from sklearn.tree import DecisionTreeClassifier

## Data preprocessing

In [3]:
csv_name = './data/WFH_WFO_dataset.csv'
df = pd.read_csv(csv_name)
df.head()

Unnamed: 0,ID,Name,Age,Occupation,Gender,Same_ofiice_home_location,kids,RM_save_money,RM_quality_time,RM_better_sleep,calmer_stressed,RM_professional_growth,RM_lazy,RM_productive,digital_connect_sufficient,RM_better_work_life_balance,RM_improved_skillset,RM_job_opportunities,Target
0,1,Bhavana,45,Tutor,Female,Yes,Yes,Yes,Yes,Yes,CALMER,5,1,5,Yes,5,5,Yes,1
1,2,Harry,24,Tutor,Male,No,No,No,No,No,CALMER,2,2,2,No,3,3,No,1
2,3,Banditaa,53,HR,Female,Yes,Yes,Yes,Yes,Yes,CALMER,3,3,4,No,5,3,No,1
3,4,Neetha,26,Engineer,Female,Yes,No,Yes,Yes,No,STRESSED,3,4,4,No,4,5,Yes,0
4,5,Ram,26,Recruiter,Male,Yes,No,No,Yes,No,STRESSED,3,3,5,Yes,4,2,Yes,1


In [4]:
target = 'Target'
features_exclude = ['ID', 'Name']
features_yesno = [
    'Same_ofiice_home_location', 
    'kids',
    'RM_save_money',
    'RM_quality_time',
    'RM_better_sleep',
    'digital_connect_sufficient',
    'RM_job_opportunities'
]

In [5]:
df_train = pd.get_dummies(df[[i for i in df.columns if i not in features_exclude]].copy())

In [6]:
feature_names = [i for i in df_train.columns if i != target]
X = df_train[feature_names]
y = df_train[target]

## Set up different versions of data

In [7]:
data_versions = {
    'version1': (X.iloc[:100].copy(), y.iloc[:100].copy()),
    'version2': (X.iloc[:120].copy(), y.iloc[:120].copy()),
    'version3': (X.iloc[:150].copy(), y.iloc[:150].copy()),
    'version4': (X.iloc[:180].copy(), y.iloc[:180].copy()),
    'version5': (X.copy(), y.copy()),
}

In [8]:
# from IPython.display import display
# for _, d in data_versions.items():
#     display(d[0].isna().sum())


## Start OrpheusDT

## Instance OrpheusDT with your task name and your name

In [9]:
orpheusDT = OrpheusDT(
    "WFH_WFO",
    "Alex"
)

## There are 3 training modes
### 1. new data + new model
     orpheus_instance.train(data_name, X, y, model_name, model)

### 2. new data + existed model
     orpheus_instance.train(data_name, X, y, model_name)

### 3. existed data + new model
     orpheus_instance.train(data_name, model_name, model)


## 1. Training with new data + new model


In [None]:
# Data preparation

data_name = 'version3'
X = data_versions[data_name][0]
y = data_versions[data_name][1]
# Model preparation
model_name = "dfc_v1"
model = DecisionTreeClassifier()

# Train
orpheusDT.train(data_name, X, y, model_name, model)


## 2. Training with new data + existed model

In [None]:
# Data preparation
data_name = 'version5'
X = data_versions[data_name][0]
y = data_versions[data_name][1]
# Model preparation
model_name = "dfc_v1"
model = DecisionTreeClassifier()

# Train
orpheusDT.train(data_name, X, y, model_name)

## 3. Training with existed data + new model


In [None]:
# Data preparation
data_name = 'version1'

# Model preparation
model_name = "dfc_v2"
model = DecisionTreeClassifier()

# Train
orpheusDT.train(data_name, model_name, model)

## Show the difference of last and current training under specified data version
orpheusDT.show_diff(data_name)



In [None]:
# Data preparation
data_name = 'version1'

# Model preparation
model_name = "dfc_v3"
model = DecisionTreeClassifier()

# Train
orpheusDT.train(data_name, model_name, model)

# show difference
orpheusDT.show_diff(data_name)


## Based on one existing data, view all the model that had trained on it
## The model is ordered by evaluation score
orpheusDT.view_all_model(data_name)


In [None]:
data_name = 'version1'
orpheusDT.view_all_model(data_name)


## View all the data that saved in database
orpheusDT.view_all_data()



In [None]:
orpheusDT.view_all_data()


## Delete specified data or set data_name to "all" to delete all data
orpheus_instance.delete_data(deleted_data_name)


In [None]:
deleted_data_name = 'version1'

orpheusDT.view_all_data()
orpheusDT.delete_data(deleted_data_name)
orpheusDT.view_all_data()

## Fixed input, show different models' prediction
orpheusDT.view_models_with_input(simple_row, model_list)


In [None]:
X = data_versions['version2'][0]
new_row_pandas = X.loc[0].to_frame().T

orpheusDT.view_models_with_input(new_row_pandas, ["dfc_v1", "dfc_v2"])


## Manually Save data to database
orpheusDT.save_Data_to_DB(data_name, X, y)



In [None]:
# Data preparation
data_name = 'version5'
X = data_versions[data_name][0]
y = data_versions[data_name][1]


orpheusDT.save_Data_to_DB(data_name, X, y)


## Restore database by snapshot
orpheusDT.restore_DB()



In [None]:
orpheusDT.restore_DB()


## Reminder of possible incompatibility due to differnet sklearn version

In [None]:
# ! pip install scikit-learn==1.0.2
import sklearn
sklearn.__version__

In [None]:
# Data preparation
data_name = 'version1'

# Model preparation
model_name = "dfc_v2"

# Train
orpheusDT.train(data_name, model_name)

In [None]:
## Inspect model by specifying 

## Model audition

In [13]:
    example_tags = {
        "requires_positive_X": False,
        "X_types": [
            "2darray"
        ],
        "multioutput": True,
        "allow_nan": False,
    }

    orpheusDT.model_audition(example_tags)


data_name    model_name    requires_positive_X    X_types    multioutput    allow_nan    pass_rate
-----------  ------------  ---------------------  ---------  -------------  -----------  -----------
version1     dfc_v1        X                      pass       pass           pass         75%
version1     dfc_v2        pass                   pass       pass           pass         100%
version1     dfc_v3        pass                   pass       pass           pass         100%
version1     dfc_v4        pass                   pass       pass           pass         100%
version2     dfc_v1        X                      pass       X              pass         50%
version3     dfc_v1        pass                   pass       pass           pass         100%
version4     dfc_v1        X                      pass       pass           pass         75%
