### Inspect data
#### List of tasks accomplished in this Jupyter Notebook:
- Output number of live and dead animals for each species and treatment
- Remove dead larvae from experiment master file
- Check that manually annotated files all exist and are spelled correctly

In [24]:
import numpy as np
import pandas as pd
import glob, os

In [2]:
# READ IN CLEANED DATASET and PRINT NUMBER OF ANIMALS FOR EACH TREATMENT
df = pd.read_csv("./data/experiment_IDs/static_data.csv")
experiments = df["treatment_odor"].unique()
species = df['species'].unique()
for specie in species: 
    temp = df[df['species'] == specie]
    temp = temp[temp['dead']=='no']
    print('Total number of animals:', len(temp))
    for experiment in experiments:
        temp = df[df["treatment_odor"] == experiment]
        temp = temp[temp['species'] == specie]
        temp2 = temp[temp['dead']!='yes']
        temp3 = temp[temp['dead']=='yes']
        print('Live n=', str(len(temp2)), 'dead n=', str(len(temp3)),
              ': total n=', str(len(temp)), 'unknown n=', 
              str(len(temp)-len(temp3)-len(temp2)),
              'for', experiment, specie)
    print('---')

Total number of animals: 67
Live n= 24 dead n= 0 : total n= 24 unknown n= 0 for 100ul_milliQ_water Aedes aegypti
Live n= 25 dead n= 1 : total n= 26 unknown n= 0 for 100ul_quinine Aedes aegypti
Live n= 21 dead n= 1 : total n= 22 unknown n= 0 for 05_percent_food Aedes aegypti
---
Total number of animals: 70
Live n= 23 dead n= 1 : total n= 24 unknown n= 0 for 100ul_milliQ_water Aedes albopictus
Live n= 26 dead n= 4 : total n= 30 unknown n= 0 for 100ul_quinine Aedes albopictus
Live n= 22 dead n= 2 : total n= 24 unknown n= 0 for 05_percent_food Aedes albopictus
---
Total number of animals: 93
Live n= 29 dead n= 13 : total n= 42 unknown n= 0 for 100ul_milliQ_water Anopheles arabiensis
Live n= 36 dead n= 16 : total n= 52 unknown n= 0 for 100ul_quinine Anopheles arabiensis
Live n= 28 dead n= 21 : total n= 49 unknown n= 0 for 05_percent_food Anopheles arabiensis
---
Total number of animals: 108
Live n= 38 dead n= 1 : total n= 39 unknown n= 0 for 100ul_milliQ_water Anopheles gambiae
Live n= 37 d

In [3]:
# REMOVE DEAD LARVAE AND SAVE DATA IN NEW SPREADSHEET
df = pd.read_csv('./data/experiment_IDs/static_data.csv')
print(len(df), "total larvae experiments")

no_dead = df[df['dead'] != 'yes'].copy()
print(len(no_dead), "larvae after removing dead larvae")

no_dead.to_csv("./data/experiment_IDs/cleaned_static_data.csv", index=None)
print("--- Data cleaned and saved to file ---")
display(no_dead.tail())

622 total larvae experiments
505 larvae after removing dead larvae
--- Data cleaned and saved to file ---


Unnamed: 0,animal_ID,larvae_pixel_size,container_pixel_size,larvae_length_mm,dead,treatment_odor,experiment_date,acclimate_start,experiment_start,sex,starve_date,starve_time,species,light_start,light_hours
615,191130-06-bottom,34,548,5.0,no,100ul_quinine,191130,4:09:00 PM,4:25:00 PM,m,191129,12:03:00 PM,Culex tarsalis,8,16
616,191130-07-top,37,549,5.4,no,05_percent_food,191130,4:44:00 PM,4:59:00 PM,m,191129,12:03:00 PM,Culex tarsalis,8,16
618,191130-08-top,38,551,5.5,no,05_percent_food,191130,5:18:00 PM,5:35:00 PM,f,191129,3:11:00 PM,Culex tarsalis,8,16
619,191130-08-bottom,35,548,5.1,no,05_percent_food,191130,5:18:00 PM,5:35:00 PM,m,191129,3:11:00 PM,Culex tarsalis,8,16
620,191130-09-top,38,549,5.5,no,05_percent_food,191130,5:35:00 PM,6:10:00 PM,f,191129,3:11:00 PM,Culex tarsalis,8,16


In [30]:
# CHECK THAT MANUALLY ANNOTATED FILES ALL EXIST AND ARE SPELLED CORRECTLY
df = pd.read_csv("./data/trajectories/manually_checked_beginning_pause.csv")

for name in df["filename"].values:
    fname = "./data/trajectories/video_csvs/"+name+".csv"
    if not os.path.isfile(fname):
        print(fname)
        
print("--- All checks passed ---")

--- All checks passed ---
