# Schema

In [None]:
from tqdm.notebook import tqdm
import numpy as np

from alphacnn.database.encoder_schema import *
from alphacnn.database import encoder_utils
from alphacnn import paths
from alphacnn.utils.data_utils import load_config


connect_to_database(
    dj_config_file=paths.CONFIG_FILE,
    create_tables=False, create_schema=False, schema_name=paths.SCHEMA_PREFIX + 'encoder')
encoder_schema

# Datasets

In [2]:
trg_dir = 'database_v1'

In [None]:
os.listdir(os.path.join(paths.DATASET_PATH, trg_dir))

In [None]:
BCNoiseConfigCore()

In [None]:
from alphacnn.utils.stimulus_utils import get_stimulus_shape

overwrite_all = False
skip_all_duplicates = True

all_stimulus_config_ids, all_stimulus_config_names = StimulusConfig.fetch('stimulus_config_id', 'stimulus_config_file')
all_bc_noise_ids = np.unique(BCNoiseConfigCore.fetch("bc_noise_id"))
all_rgc_ids = np.unique(RGCSynapticWeights.fetch('rgc_id'))

for stimulus_config_id, stimulus_config_name in zip(all_stimulus_config_ids, all_stimulus_config_names):
    for bc_noise_id in all_bc_noise_ids:
        bc_noise_name = (BCNoiseConfigCore & dict(bc_noise_id=bc_noise_id)).fetch('bc_noise_name')
        assert np.unique(bc_noise_name).size == 1
        bc_noise_name = bc_noise_name[0]
        
        for rgc_id in all_rgc_ids:
            rgc_name = (RGCSynapticWeights & dict(rgc_id=rgc_id)).fetch('rgc_name')
            assert np.unique(rgc_name).size == 1
            rgc_name = rgc_name[0]
            
            dataset_name = 'dataset_' + stimulus_config_name.replace('.yml', '') + '_' + rgc_name
            dataset_name += '_bcns' + bc_noise_name

            file_path = os.path.join(paths.DATASET_PATH, trg_dir, dataset_name + '.pkl')

            if os.path.isfile(file_path) and skip_all_duplicates:
                action = 'skip'
            elif os.path.isfile(file_path) and not overwrite_all:
                user_input = input(f'Overwrite existing file [y=yes/n=no/s=skip]? {file_path}')    
                if user_input == 'y':
                    action = 'write'
                elif user_input == 's':
                    action = 'skip'
                else:
                    action = 'error'
            else:
                action = 'write'

            dataset_tab = encoder_utils.fetch_dataset(
                stimulus_config_ids=[stimulus_config_id], rgc_id=rgc_id, bc_noise_id=bc_noise_id)

            dataset_df = dataset_tab.fetch(format='frame').reset_index()

            if dataset_df.shape[0] == 0:
                print('Zero length, skip:', stimulus_config_name)
                continue

            stim_conf = (StimulusConfig() & dataset_tab).fetch1('stimulus_dict')

            pixel_size_um = stim_conf['stimulus']['pixel_size']

            width, height, _ = get_stimulus_shape(
                pixel_size=pixel_size_um,
                stimulus_size_x=stim_conf['stimulus']['size_x'],
                stimulus_size_y=stim_conf['stimulus']['size_y'],
            )

            dataset_df['pixel_size_um'] = pixel_size_um
            dataset_df['video_width'] = width
            dataset_df['video_height'] = height

            if action == 'write':
                print(action, file_path)
                dataset_df.to_pickle(file_path)
            elif action == 'skip':
                print(action, file_path)
                continue
            else:
                raise FileExistsError(file_path)

            print('----------------------')

In [None]:
dataset_tab

In [None]:
dataset_df.head()