# Workflow to train new classifiers in a notebook
Classifier training in the napari-feature-classifier Classifier class, using the annotation data from an existing classifier, optionally with different features.
Useful when a user annotated data once, then generates new measurements or wants to compare different feature subsets.

The current classifier isn't really designed to do this, but it's possible to use it that way

Here are the steps taken in this notebook

0. Set the parameters
1. Load the annotation data that was created by the napari-feature-classifier
2. Load the relevant feature measurements
3. Create a new classifier instance of the napari-feature-classifier
4. Assign the feature data
5. Overwrite the training data with the loaded annotation data
6. Train the classifier

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import napari
from napari_feature_classifier.classifier import Classifier

## 0. Parameters for the user to change

In [None]:
annotation_path = "/Users/joel/Desktop/Classifier_data_Conny/Dll1Classifier_log_05022023_train_dataset_48h.csv"

# If base paths need to be changed:
old_base_paths = ["/Users/cornelia/CellClassifier_05022023/"]
new_base_paths = ["/Users/joel/Desktop/Classifier_data_Conny/FeatureMeasurements/"]

# Column names. The first one is always path. The second one is the column name of the labels
index_columns = ["path", "cyto_id_linked"]

training_features = ['Mean_intensity_nuc', 'Median_nuc']

### 1. Load the annotation data that was created by the napari-feature-classifier

In [None]:
annotation_df = pd.read_csv(annotation_path)

In [None]:
# Optional: Limit which entries are loaded. e.g. only load the first 148 entries
annotation_df = annotation_df[0:147]
annotation_df

### 2. Load the relevant feature measurements

In [None]:
# Optional: Only relevant if base paths need to be changed
for i in range(len(new_base_paths)):
    annotation_df['path'] = annotation_df['path'].str.replace(old_base_paths[i], new_base_paths[i])

In [None]:
# Find all the feature csv files that need to be loaded
features_df = pd.DataFrame()
feature_files = annotation_df['path'].unique()
for feature_file in feature_files:
    sf_df = pd.read_csv(feature_file)
    sf_df['path'] = feature_file
    features_df = pd.concat([features_df, sf_df])

In [None]:
features_df

### 3. Create a new classifier instance of the napari-feature-classifier & 4. Assign the feature data

In [None]:
save_dir = Path(".")
features_clf= features_df.set_index(list(index_columns))
clf = Classifier(
    name='',
    features=features_clf,
    training_features=training_features,
    directory=save_dir,
    method='rfc',
    index_columns=index_columns
)

### 5. Overwrite the training data with the loaded annotation data

In [None]:
# Match annotation_df with features_df
train_tmp = features_clf.merge(annotation_df, how = 'left', on=index_columns).set_index(list(index_columns))
clf.train_data.loc[:, 'train'] = train_tmp['train_y']

In [None]:
#clf.train_data

### 6. Train the classifier

In [None]:
f1_score = clf.train()

In [None]:
print(f1_score)

In [None]:
# TODO: Do something with the classifier scores