#  Notebook dedicated to the creation of the csv files from the dat files generated by PRIMs_cmd
_____Alice Dauphin 2020_____

# Code Executed

## Control condition

In [None]:
"""
directory='/Users/alice/Desktop/BottomUpLearning3'
prims='/PRIMs_cmd/Build/Products/Debug/PRIMs_cmd'
output='/run/tasks_control'
file='/control_noreset.bprims'

"$directory$prims" -c "$directory$output$file" "$directory$output"
"""


## Random condition

To execute PRIMs_cmd from the terminal in the progress driven mode, one needs to execute:

PRIMs_cmd -r bprimsPath index nbOfIterations outputPath

In which:
 
- bprimsPath: path towards the bprims file 
- index: index of the simulation
- nbOfIterations: number of trials per simulation 
- outputPath: path of the output file (dat file)

In [6]:
"""
directory='/Users/alice/Desktop/BottomUpLearning3'
prims='/PRIMs_cmd/Build/Products/Debug/PRIMs_cmd'
output='/run/random'
file='/random.bprims'

for i in {0..100} ; do $directory$prims -r $directory$output$file $i 11000 $directory$output ; done
"""

"\ndirectory='/Users/alice/Desktop/BottomUpLearning3'\nprims='/PRIMs_cmd/Build/Products/Debug/PRIMs_cmd'\noutput='/random'\nfile='/random.bprims'\n\nfor i in {0..100} ; do $directory$prims -r $directory$output$file $i 11000 $directory$output ; done\n"

## Progress driven condition

To execute PRIMs_cmd from the terminal in the progress driven mode, one needs to execute:

PRIMs_cmd -a bprimsPath index nbOfIterations threshold distance outputPath

In which:
 
- bprimsPath: path towards the bprims file 
- index: index of the simulation
- nbOfIterations: number of trials per simulation 
- threshold: double of the number of trials from which accuracy is computed in PRIMs (default value=20)
- distance: d in the derivative equation
- outputPath: path of the output file (dat file)

In [None]:
"""
directory='/Users/alice/Desktop/BottomUpLearning3'
prims='/PRIMs_cmd/Build/Products/Debug/PRIMs_cmd'
output='/run/accu_deriv'
file='/accuderiv.bprims'

for i in {0..99} ; do $directory$prims -a $directory$output$file $i 11000 20 130 $directory$output ; done
"""

# Preprocessing

In [17]:
from __future__ import print_function
%xmode Verbose

%matplotlib notebook
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np
import math
import scipy
import os

Exception reporting mode: Verbose


In [18]:
os.getcwd()

'/Users/alice/Desktop/BottomUpLearning3/run'

### GroupOfSim

In [26]:
class GroupOfSim:
    """group all the simulations done in a given condition"""
    def __init__(self):
        self.dat_sim = {} #dictionary that contains simulations from one or several dat files
            
    def fill_sim_from_dat(self, func, thresh, dist, indexMax, accuWindow):
        """retrieve the dat files generated in PRIMs to fill the dict of simulations"""
        self.function = func
        self.threshold = thresh
        self.distance = dist
        self.indexMax = indexMax
        self.trials = {}
        for i in range(self.indexMax+1):
            #create the Simulation object
            self.dat_sim[str(i)] = Simulation(function = func, threshold = thresh, distance = dist, index=i)
            #Pre-process the data
            self.dat_sim[str(i)].dataCreation(window_size = accuWindow)
            self.trials[str(i)]=pd.Series(self.dat_sim[str(i)].trials)
        self.summary_trials= pd.DataFrame(self.trials).T
        #Export the dataframe summary_trials
        self.summary_trials.index.name='Ind' #Name the index so the column will be named in the csv file
        export_name = self.function + '_trials.csv'
        self.summary_trials.to_csv(export_name, sep=';', encoding='utf-8', header=True)   

### Simulation

In [35]:
#To split the control_noreset dataframes at each reset ie every X tasks * Y repetitions per task
step = 1430*7 # 7 = number of tasks!!! 1430 = number of trials

class Simulation:
    """One simulation"""
    def __init__(self, function, threshold, distance, index):
        if function == 'accuderiv' :
            self.fileName = 'accuderiv/accuderiv_thsh' + str(threshold) + '_dist' \
                        + str(distance) + '_' + str(index) +'.dat'
        if function == 'random': 
            self.fileName = 'random/random_random_' + str(index) +'.dat'
        if function == 'control_noreset_test':
            self.fileName = 'tasks_control/control_noreset_test.dat'
        self.function = function
        self.rawData = pd.read_csv(self.fileName, sep=' ',header=None, names = ['Run','Task','Label',
                        'Trial','Time','Event', 'goalBLActivation', 'reachGoal', 'retrievedOperators'],
                        dtype = {'Run':np.uint16,'Task': "category" ,'Label': "str",'Trial':np.uint16,
                        'Event': "category", 'goalActivation':np.float64,'reachGoal':np.bool_, 'retrievedOperators': "str"})
        self.tasks = self.rawData['Task'].cat.categories
        self.trials = {}
        self.goalOrdering = pd.DataFrame(columns = self.tasks)

    def dataCreation(self, window_size):
        """Function that preprocesses the rawData generated by the batch file"""
        self.data = self.rawData.loc[self.rawData.Event == 'trial-end', ['Task','Time', \
                            'goalBLActivation', 'reachGoal', 'retrievedOperators']]
        self.trials['total'] =  len(self.data)
        if self.trials['total'] < 10000: #check for a minimum number of trials
            print(task + ' total number of trials = '+self.trials['total'])
        self.data['Trial'] = np.zeros(self.trials['total'], dtype=int) #Add a trials column
        self.data['Accuracy'] = np.zeros(self.trials['total'], dtype=float) #Add an Accuracy column
        
        if self.function =='control_noreset_test':
            for repeat in range(100):
                df=self.data.iloc[repeat*step:(repeat+1)*step]
                #Fill the columns Accuracy and Trial of the dataframe data with the corresponding values 
                #specific for a given task
                for task in self.tasks: 
                    self.trials[task] = len(df.loc[df.Task == task])
                    for i in range(self.trials[task]):
                        df.loc[df.loc[df.Task == task].iloc[i].name, 'Trial'] = i
                    df.loc[(df.Task==task),'Accuracy']= \
                        df.loc[(df.Task==task),'reachGoal'].rolling(window_size).mean()
                df = df[['Task','Trial', 'Time', 'goalBLActivation',
                                       'reachGoal', 'Accuracy', 'retrievedOperators']].reset_index(drop=True)
                df.index.name='Ind' #Name the index so that the column will be named in the csv file
                #Save the data file to csv format with the same name (except the extension)
                df.to_csv(self.fileName[:-4]+ '_'+ str(repeat) + '.csv', sep=';', encoding='utf-8', header=True) 
            
        else:
            for task in self.tasks: 
                self.trials[task] = len(self.data.loc[df.Task == task])
                for i in range(self.trials[task]):
                    self.data.loc[self.data.loc[self.data.Task == task].iloc[i].name, 'Trial'] = i
                self.data.loc[(self.data.Task==task),'Accuracy']= \
                    self.data.loc[(self.data.Task==task),'reachGoal'].rolling(window_size).mean()
            self.data = self.data[['Task','Trial', 'Time', 'goalBLActivation',
                               'reachGoal', 'Accuracy', 'retrievedOperators']].reset_index(drop=True)
            self.data.index.name='Ind' #Name the index so that the column will be named in the csv file
            #Save the data file to csv format with the same name (except the extension)
            self.data.to_csv(self.fileName[:-3]+ 'csv', sep=';', encoding='utf-8', header=True)  

In [19]:
class BatchControl:
    def __init__(self, batchFileName, repeat, nbOfTrials):
        self.batchFileName = batchFileName +'.dat'
        self.repeat = repeat #number of times the experiment was repeated
        self.nbOfTrials = nbOfTrials #number of trials per task within an experiment
        self.fileName = 'tasks_control/'+self.batchFileName
        self.rawData = pd.read_csv(self.fileName, sep=' ',header=None, names = ['Run','Task','Label',
                        'Trial','Time','Event', 'goalBLActivation', 'reachGoal', 'retrievedOperators'],
                        dtype = {'Run':np.uint16,'Task': "category" ,'Label': "str",'Trial':np.uint16,
                        'Event': "category", 'goalActivation':np.float64,'reachGoal':np.bool_, 
                                 'retrievedOperators': "str"})
        self.tasks = self.rawData['Task'].cat.categories

        
    def dataCreation(self, window_size):
        """Function that preprocesses the rawData generated by the batch file"""
        self.data = self.rawData.loc[self.rawData.Event == 'trial-end', ['Run', 'Task','Time', \
                            'goalBLActivation', 'reachGoal', 'retrievedOperators']]
        self.data['Trial'] = np.zeros((len(self.data)), dtype = int) #add a trial column
        self.data['Accuracy'] = np.zeros((len(self.data)), dtype = float) #add an accuracy column
        for task in self.tasks: 
            length = len(self.data.loc[self.data.Task == task])
            if length != (self.repeat+1)*self.nbOfTrials:#check coherence
                print("Task " + task + " was repeated " + str(length) +" times")
                print("The expected number of trials per simulation was "+ str(self.nbOfTrials))
                print(" and the expected number of simulations was "+ str(self.repeat))
            self.data.loc[self.data.Task == task, 'Trial'] = np.array([i%self.nbOfTrials for i in range(length)])
            
            for run in range(self.repeat+1):
                self.data.loc[(self.data.Task == task) & (self.data.Run == run), 'Accuracy'] = \
                    self.data.loc[(self.data.Task == task) & (self.data.Run == run),'reachGoal'].rolling(window_size).mean()
        self.data = self.data[['Run', 'Task','Trial', 'Time', 'goalBLActivation',
                               'reachGoal', 'Accuracy', 'retrievedOperators']] 
        
    def generate_csv(self):
        """Split the dataframe self.data into self.repeat dataframes and export them into a csv format"""
        for run in range(self.repeat+1):
            df_run = self.data.loc[self.data.Run==run]
            #Delete the Run column
            df_run = df_run[['Task','Trial', 'Time', 'goalBLActivation', 'reachGoal', 'Accuracy', 
                             'retrievedOperators']] 
            
            df_run = df_run.reset_index(drop=True) #Reset the index from 0 to nbOfTrials
            df_run.index.name='Ind' #Name the index so that the column will be named in the csv file
            outputName = self.batchFileName[:-4]+ '_'+ str(run)+'.csv'
            df_run.to_csv(outputName, sep=';', encoding='utf-8', header=True)

# Import data from .dat file

## Control

### With reset

In [20]:
task_control = BatchControl(batchFileName = 'task_control1430', repeat = 99, nbOfTrials = 1430)
task_control.dataCreation(window_size = 10)
task_control.generate_csv()

In [None]:
task_control2 = BatchControl(batchFileName = 'task_control10000', repeat = 99, nbOfTrials = 10000)
task_control2.dataCreation(window_size = 10)
task_control2.generate_csv()

### Without reset

In [36]:
control= GroupOfSim()
control.fill_sim_from_dat(func='control_noreset_test', thresh=0 , dist=0, indexMax=0, accuWindow=10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


## Random

In [None]:
random = GroupOfSim()
random.fill_sim_from_dat(func='random', thresh=0 , dist=0, indexMax=99, accuWindow=10)

## Progress driven

In [None]:
accuderiv = GroupOfSim()
accuderiv.fill_sim_from_dat(func='accuderiv', thresh=20 , dist=80, indexMax=99, accuWindow=10)