# Stimulus evaluation (INTERNAL USE ONLY)
We ran a manual check on all the 1200 test stimuli. 

**The purpose of this notebook is to:** 
* Fetch the data from mongoDB
* Provide summary stats over the number of problematic stim
* Display the problematic stims

In [None]:
import os
import sys
import urllib, io

sys.path.append('./analysis_helpers')
from importlib import reload
from analysis_helpers import *

import numpy as np
import scipy.stats as stats
import pandas as pd

import pymongo as pm
from collections import Counter
import json
import re
import ast

from PIL import Image, ImageOps, ImageDraw, ImageFont 

from io import BytesIO
import base64

from tqdm import tqdm

import  matplotlib
from matplotlib import pylab, mlab, pyplot
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
plt.style.use('seaborn-white')

import seaborn as sns

%matplotlib inline
import scipy.stats
import random

from IPython.display import clear_output

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

#### set up paths and directories

In [None]:
## directory & file hierarchy
proj_dir = os.path.abspath('..')
datavol_dir = os.path.join(proj_dir,'data')
analysis_dir =  os.path.abspath('.')
results_dir = os.path.join(proj_dir,'results')
plot_dir = os.path.join(results_dir,'plots')
csv_dir = os.path.join(results_dir,'csv')
json_dir = os.path.join(results_dir,'json')
exp_dir = os.path.abspath(os.path.join(proj_dir,'behavioral_experiments'))
png_dir = os.path.abspath(os.path.join(datavol_dir,'png'))

## add helpers to python path
if os.path.join(proj_dir,'stimuli') not in sys.path:
    sys.path.append(os.path.join(proj_dir,'stimuli'))
    
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
    
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)   
    
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)       
    
## add helpers to python path
if os.path.join(analysis_dir,'utils') not in sys.path:
    sys.path.append(os.path.join(analysis_dir,'utils'))   

**import routines to fetch data from mongoDB**
Remember you need to provide `auth.txt` with the password and create the ssh tunnel to the DB instance.
Make sure to run `ssh -fNL 27017:127.0.0.1:27017 USERNAME@cogtoolslab.org`

In [None]:
import generate_dataframes

In [None]:
!ssh -fNL 27017:127.0.0.1:27017 fbinder@cogtoolslab.org

Let's fetch the dataframes. This might take a while. 

First, here are the iteration names for the evaluation

In [None]:
neurips2021_evaluation_iterations = [
    {'study': "dominoes",
     'bucket_name': 'human-physics-benchmarking-dominoes-pilot',
     'stim_version': 'production_1',
     'iterationName': 'production_1_testing'},
    {'study': "collision",
     'bucket_name': 'human-physics-benchmarking-collision-pilot',
     'stim_version': 'production_2',
     'iterationName': 'production_2_testing'},
    {'study': "towers",
     'bucket_name': 'human-physics-benchmarking-towers-pilot',
     'stim_version': 'production_2',
     'iterationName': 'production_2_testing'},
    {'study': "linking",
     'bucket_name': 'human-physics-benchmarking-linking-pilot',
     'stim_version': 'production_2',
     'iterationName': 'production_2_testing'},
    {'study': "containment",
     'bucket_name': 'human-physics-benchmarking-containment-pilot',
     'stim_version': 'production_2',
     'iterationName': 'production_2_testing'},
    {'study': "rollingsliding",
     'bucket_name': 'human-physics-benchmarking-rollingsliding-pilot',
     'stim_version': 'production_2',
     'iterationName': 'production_2_testing'},
    {'study': "drop",
     'bucket_name': 'human-physics-benchmarking-drop-pilot',
     'stim_version': 'production_2',
     'iterationName': 'production_2_testing'},
    {'study': "clothiness",
     'bucket_name': 'human-physics-benchmarking-clothiness-pilot',
     'stim_version': 'production_2',
     'iterationName': 'production_2_testing'},
]

database_name = "curiophysion_stim_validation"

Time to make some tea? 🫖

In [None]:
dfs = {}
for it in neurips2021_evaluation_iterations:
    _it = it
    print("Fetching:",it['study'])
    dfs[it['study']] = generate_dataframes.pull_straight_df_from_mongo(
        it['study'], database_name)
print("Done")


## Basic Analyses

What are the ratings across scenarios?

In [None]:
for scenario,df in dfs.items():
    print(scenario)
    print(df['response'].value_counts())
    print("---------------")

## Examples
Let's look at all problematic stimuli

In [None]:
from IPython.display import HTML

def display_rows(_df):
    """Expects a dataframe with the colums 'stimulus_name', 'response', 'stim_url'. 
    Needs to be wrapped in HTML() to display in a notebook"""
    html = ""
    for i,row in _df.iterrows():
        div = """
<div>
<b>Stim name</b>:{}<br>
<b>Ratings</b>:{}<br>
<b>Outcome</b>:{}<br>
<video width="40%" controls>
<source src="{}">
</video></div>""".format(row['stimulus_name'],row['response'],row['target_hit_zone_label'],row['stim_url'])
        html+=div
    return html


In [None]:
html = ""
for scenario, df in dfs.items():
    html += "<h1>{}</h1>".format(scenario)
    df_agg = df.groupby("stim_ID").agg({
                                                                    'stimulus_name':'first',
                                                                    'response':lambda x:str(np.unique(x)),
                                                                    'target_hit_zone_label':'first',
                                                                    'stim_url':'first'
                                                                    })
    df_agg = df_agg[~df_agg['response'].isin(
        ["['Fine 👍']", "['Fine 👍' 'Fine']", "['Fine' 'Fine 👍']"])]
    html += display_rows(df_agg)

HTML(html)
