# Readme

This is a tutorial on the RGC classifier.
This tutorial includes some default values used for the classifier.

It will create a schema based on the schema in `djimaging.schemas.rgc_classifier_schema`.
However, in practice, it's better to define your own schema within your own user, as in `tutorial_my_schema.iypnb`.
Then you can modify the schema as you wish and e.g. include the RGC classifier.

# Imports

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import datajoint as dj

# Create database

In [None]:
username = !whoami
username = username[0]
username

In [None]:
home_directory = os.path.expanduser("~")
home_directory

In [None]:
# Set config file
config_file = f'{home_directory}/datajoint/dj_{username}_conf.json'
assert os.path.isfile(config_file), f'Set the path to your config file: {config_file}'

In [None]:
# Define a schema name or use the default name for your personal test schema
schema_name = f"ageuler_{username}_test"

In [None]:
output_folder = f'{home_directory}/datajoint/rgc_classifier'
assert os.path.isdir(output_folder), f'Set path to output directory: {output_folder}'

In [None]:
from djimaging.tables.classifier.rgc_classifier import prepare_dj_config_rgc_classifier

# Load configuration for user
dj.config.load(config_file)
dj.config['schema_name'] = schema_name

prepare_dj_config_rgc_classifier(output_folder)

print("schema_name:", dj.config['schema_name'])
dj.conn()

In [None]:
from djimaging.schemas.rgc_classifier_schema import *

## Important note

If the schema with the name `schema_name = f"ageuler_{username}_test"` already exists, it is important that the schema definition here is the same as it was when the schema was created.
If you did the other tutorial first, and did not delete (=drop) the schema afterwards, this will not be the case, for example.
Then you already have a schema with the same name but different tables, the first being based on the schema `my_schema`, and this one being based on `rgc_classifier_schema`. This can result in a variety of problems, so you either have to change the schema name here or drop the old schema first.

Outside of this tutorial, in most cases, you want exactly one schema per project to never run into this problem.

In [None]:
from djimaging.utils.dj_utils import activate_schema

activate_schema(schema=schema, create_schema=True, create_tables=True)
schema

In [None]:
# Draw the ERD of your schema, this may take a while
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)

dj.ERD(schema)

# Classifier

## Add classifier method

In [None]:
classifier_fn = "sklearn.ensemble.RandomForestClassifier"
classifier_config = {
    'class_weight': 'balanced',
    'random_state': 2001,
    'oob_score': True,
    'ccp_alpha': 0.00021870687842726034,
    'max_depth': 50,
    'max_leaf_nodes': None,
    'min_impurity_decrease': 0,
    'n_estimators': 1000,
    'n_jobs': 20,
}

ClassifierMethod().add_classifier(
    classifier_config=classifier_config,
    classifier_fn=classifier_fn, comment="test", skip_duplicates=True)

In [None]:
ClassifierMethod()

## Add training data

In [None]:
ipath = dj.config['stores']["classifier_input"]["location"] + '/'
opath = dj.config['stores']["classifier_output"]["location"] + '/'

ClassifierTrainingData().add_trainingdata(
    project="False",
    output_path=opath,
    chirp_feats_file=ipath + 'chirp_feats.npz',
    bar_feats_file=ipath + 'bar_feats.npz',
    baden_data_file=ipath + 'RGCData_postprocessed.mat',
    training_data_file=ipath + 'training_all.pkl',
    skip_duplicates=True,
)
ClassifierTrainingData()

## Train classifier

In [None]:
Classifier().populate()
Classifier()

# Load your data into DataBase

## Define which data and how to load data

In [None]:
userinfo = {
    'experimenter': 'DataJointTestData',  # Replace this if you want to use your own data
    'data_dir': '/gpfs01/euler/data/Data/DataJointTestData/xy-RGCs/', # Replace this if you want to use your own data
    'datatype_loc': 0,
    'animal_loc': 1,
    'region_loc': 2,
    'field_loc': 3,
    'stimulus_loc': 4,
    'condition_loc': 5,
}

assert os.path.isdir(userinfo['data_dir'])

In [None]:
UserInfo().upload_user(userinfo)
UserInfo()

In [None]:
RawDataParams().add_default()
RawDataParams()

## Load data

In [None]:
Experiment().rescan_filesystem(verboselvl=0)
Experiment()

In [None]:
Field().rescan_filesystem(verboselvl=0)
Field()

In [None]:
Field().populate()

In [None]:
Stimulus().add_nostim(skip_duplicates=True)
Stimulus().add_chirp(spatialextent=1000, stim_name='gChirp', alias="chirp_gchirp_globalchirp", skip_duplicates=True)
Stimulus().add_chirp(spatialextent=300, stim_name='lChirp', alias="lchirp_localchirp", skip_duplicates=True)
Stimulus().add_noise(stim_name='noise', pix_n_x=20, pix_n_y=15, pix_scale_x_um=30, pix_scale_y_um=30,
                     skip_duplicates=True)
Stimulus().add_movingbar(skip_duplicates=True)

In [None]:
Presentation().populate(display_progress=True, processes=20)
Presentation()

In [None]:
Roi().populate(display_progress=True, processes=20)
Roi()

## Process data

In [None]:
Traces().populate(display_progress=True, processes=20)
Traces()

In [None]:
PreprocessParams().add_default()
PreprocessParams()

In [None]:
PreprocessTraces().populate(display_progress=True, processes=20)
PreprocessTraces()

In [None]:
Snippets().populate(display_progress=True, processes=20)
Snippets()

In [None]:
Averages().populate(display_progress=True, processes=20)
Averages()

In [None]:
ChirpQI().populate(display_progress=True, processes=20)
ChirpQI()

In [None]:
OsDsIndexes().populate(display_progress=True, processes=20)
OsDsIndexes()

# Assign cell types

-1 Mean no assignment because of the quality

The cell type assignment can still be uncertain, though. You should filter by confidence!

In [None]:
Baden16Traces().populate(display_progress=True, processes=20)

In [None]:
CelltypeAssignment().populate(display_progress=True)
CelltypeAssignment()

In [None]:
CelltypeAssignment().plot(threshold_confidence=0.0)
CelltypeAssignment().plot(threshold_confidence=0.25)
CelltypeAssignment().plot(threshold_confidence=0.5)

# Clean up

If you are done with the tutorial you can delete (=drop) your schema again and create a schema with a more meaningful name than `ageuler_{username}_test`.

In [None]:
if input("Continue with cleaning up? (yes/no))") != "yes":
    raise ValueError('Enter yes if you wish to continue.')

schema.drop()