# Preprocessing
1. For individual observers, entries for paintings that were never presented are discarded.
2. All entires are combined into a single [results_combined.csv](results_combined.csv) table.
3. For all entires with an estimate (_i.e._, Confidence>0), the last estimate is used to compute the relative location of the light source both in polar and in Cartesian coordinate systems. Results of preprocessings are saved to [results_with_estimates.csv](results_with_estimates.csv).

In [1]:
# importing prerequisites
from ast import literal_eval
import os
import numpy as np
import pandas as pd

### Loading all observers into a single data frame and saving it to the 'results_combined.csv'

In [2]:
data_folder= 'Data'
files= ['01KSC94w', '02SEF89m', '03SSK93m', '04IKB95w', '05SKL94w', '06MMN92m', '07HHA96w']
results= None

for current_file in files:
    # loading data for an individual observer
    current_observer= pd.read_csv(os.path.join(data_folder, current_file+".csv"), sep=';', decimal='.', encoding='utf8')
    
    # discarding any entry for the painting that was not presented (no Confidence report)
    current_observer= current_observer[current_observer.Confidence.notnull()]
    
    # due to programming error, for some observers trial timestamp column is absent
    if not 'TrialTimestamp' in current_observer.columns.values:
        print(current_file)
        current_observer['TrialTimestamp']= np.nan

    # merging tables
    if results is None:
        results= current_observer
    else:
        results= pd.concat([results, current_observer])     
        
# reseting index (multiple index entries were combined during merging)
results= results.reset_index(drop=True)

# saving 
results.to_csv(os.path.join('Data', 'results_combined.csv'), sep=';', index_label=False, decimal= '.', index=False, encoding='utf8')

01KSC94w
02SEF89m
03SSK93m


### Extracting an estimate as an angle in polar coordinates and as dx/dy components

In [3]:
### computing estimate both in polar and in cartesian coordinate systems
results['dx']= np.nan # randomly inverted in ~50% of trials
results['true_dx']= np.nan
results['dy']= np.nan
results['angle']= np.nan # randomly flipped around 0 deg in ~50% of trials
results['true_angle']= np.nan

for iRow in results.index:
    if results.Confidence[iRow]>0: # 0 confidence means lack of estimate
        current_estimate= literal_eval(results.Estimates[iRow])
        results.set_value(iRow, 'dx', -(current_estimate[-1]['End'][0]- current_estimate[-1]['Start'][0]))
        results.set_value(iRow, 'dy', current_estimate[-1]['End'][1]- current_estimate[-1]['Start'][1])
        results.set_value(iRow, 'angle',  np.arctan2(results.dy[iRow], results.dx[iRow]))
        
        if results.FlippedLR[iRow]:
            results.set_value(iRow, 'true_dx', -results.dx[iRow])
        else:
            results.set_value(iRow, 'true_dx', results.dx[iRow])
        results.set_value(iRow, 'true_angle',  np.arctan2(results.dy[iRow], results.true_dx[iRow]))
        
## converting polar angle estimates so that 1) 0 degrees is at 12 o'clock, 2) they are within -pi..pi range, and 3) angles are counted CLOCKWIZE
for angle_type in ['angle', 'true_angle']:
    # 1) 0 degrees is at 12 o'clock
    results.loc[results.Confidence>0, angle_type]= results.loc[results.Confidence>0, angle_type]-np.pi/2

    # 2) estimates are within -pi..pi range
    results.loc[(results.Confidence>0) & (results[angle_type]<-np.pi), angle_type]= results.loc[(results.Confidence>0) & (results[angle_type]<-np.pi), angle_type]+2*np.pi

    # 3) angles are counted clockwise
    results.loc[results.Confidence>0, angle_type]= -results.loc[results.Confidence>0, angle_type]    
    
## adding a degrees version
results['angle_deg']= np.degrees(results.angle)
results['true_angle_deg']= np.degrees(results.true_angle)
    
# saving 
results.to_csv(os.path.join('Data', 'results_with_estimates.csv'), sep=';', decimal='.', index_label=False, index=False, encoding='utf8')