#### Parameters

In [1]:
import pandas as pd

import loader
import analysis_functions

In [2]:
csv_directory = ''
analysis_directory = ''
metadata_file = ''
condition_name = 'SAT'
acc_col_name = 'acc'
default_acc_time = 2 # days
time_bin = 240 # minutes

# using rolling window
freq_window = 300 # milliseconds
freq_bin = 100 # milliseconds

last_percent = 20
min_trials = 0
min_water_trials = 0
min_blank_trials = 0

#### Load Files

In [None]:
metadata = pd.read_excel(metadata_file)
df = loader.make_condition_df(csv_directory, condition_name, metadata, acc_col_name, default_acc_time)

#### Lick Frequency Analysys

In [None]:
data, prev_blank, prev_water = lickfreq_analysis(df, freq_window, freq_bin, time_bin, (not args.noUI))

#### Trial Binning

In [None]:
name = "one_back"
cond_pos = "blank"
cond_neg = "water"
gp = "animal"
bin_prev_identity(data, key, index)

#### Aggregate analysis

In [None]:
mean_statistics, counts, performance = aggregate_analysis(data, min_trials, min_blank_trials, min_water_trials)

#### Output aggregate values to files

In [None]:
output_dir = f'{analysis_directory}/{condition_name}/stats/{condition_name}'
output_prev = f'{analysis_directory}/{condition_name}/prev_stats/{condition_name}'

cols = ["condition", "sex", "age", "strain", "animal", "stimulus", "Time (hr)", "Time (ms)", "lick"]
mean_statistics.to_csv(f'{output_dir}_lick_frequency.csv', columns=cols, index=False)

cols = ["condition", "sex", "age", "strain", "animal", "Time (hr)", "Time (ms)", "trial no"]
counts.to_csv(f'{output_dir}_trial_counts.csv', columns=cols, index=False)

cols = ["condition", "sex", "age", "strain", "animal", "Time (hr)", "Time (ms)", "lick"]
performance.to_csv(f'{output_dir}_performance.csv', columns=cols, index=False)

Plotting

#### Pipeline description

__Pipeline:__

>```
>raw data (csv) ------------|    loader.py                                                     lickfreq_analysis                                            
>                            |------------------> formatted data - many animals (dataframe) ---------------------------> trial-level analysis(dataframe)
>metadata (excel) ----------|
>
>                                     aggregate_analysis   |---> trial number counts by timebin and condition (dataframe) ---|
>trial-level analysis (dataframe) ------------------------|---> mean lick frequency by timebin (dataframe) -----------------|---> plots
>                                                          |---> performance by timebin (dataframe) -------------------------|
>```

***

__Formats__

*raw data*

>time (ms), poke, lick, condition code, delay, stimulus

- time measured from start of file
- poke in {0, 1}
- lick in {0, 2}
- condition code in {3, 4, 5, 7, 9}
    - 3 = delay
    - 4 = air delivery
    - 5 = water delivery
    - 7 = after water delivery
    - 9 = blank
- 200 <= delay <= 800
- stimulus in {0, 1}
    - stimulus not present in SAT code
    - stimulus present and relevant in pseudo code

*metadata*

- excel
- must include 'Animal ID' (3 letter + number animal code)
- must include 'acc' (time of acclimation in days)
- other example columns: age, sex, strain

*formatted data*

>timestamp, poke, lick, condition code, delay, trial no, water, stimulus, animal, condition, metadata

- metadata can be multiple columns

*trial-level analysis*
> condition, animal, trial no, puff delta, trial start, timestamp, metadata, stimulus, water, lick, poke, delta, time (hr), time (ms)

- puff delta is equivalent to time (ms)
    - represents timestamp normalized to air delivery
    - puff delta is timedelta and time (ms) is float
- trial start represents timebin as timestamp
- delta is equivalent to time (hr)
    - represents timebin normalized to start of stat
    - delta is timedelta and time (hr) is float
- metadata can be multiple columns
- lick and poke now represent frequencys aggregated based on given window size

