# 0. Setup

In [2]:
import os
import pandas as pd
import pickle
from io import StringIO

# change into the folder that contains the unzipped data (in the folder "DataManagementIntergration_Data")
#data_path = r'C:\Users\sjants\Desktop\Data' # simon
data_path = r'../DataManagementIntergration_Data/OriginalData_wRouteTest' # ivo

## 1. Get overview of files

In [3]:
dict_folder_file = {} # initialize empty dictionary

for subfolder in os.listdir(data_path):
    if subfolder not in dict_folder_file.keys(): # check if the dictionary already contains entries for the subfolder
        dict_folder_file[subfolder] = [] # if not, add an empty list to as value for that entry
    for entry in os.listdir('/'.join((data_path, subfolder))):
        if entry == 'UnityDataSave': # if the subfolder is UnityDataSave
            contents = os.listdir('/'.join((data_path, subfolder, entry))) # get the contents of the folder
            contents = ['/'.join((entry, i)) for i in contents] # construct the path to the file
            for entr in contents:
                dict_folder_file[subfolder].append(entr) # append 'UnityDataSave/filename' to the dictionary
        else:
            dict_folder_file[subfolder].append(entry) # append the list for that entry with the respective files

#dict_folder_file # keys: subfolders, values: list of the contained files, e.g. dict = {folder1: [file1, file2], ...}

# 2. Read in files

## Inspect files

In [4]:
if False:
    for key in dict_folder_file.keys(): # iterate through each subfolder
        print(key) # print key (folder)
        print(dict_folder_file[key]) # print dict entries (files)
        print(dict_folder_file[key][0]) # print first file        

## 2.2 Create `df_detailed_subj`

In [5]:
list_dfs = []

for key in dict_folder_file.keys(): #iterate through each subfolder
    df = pd.read_csv('/'.join((data_path,key,dict_folder_file[key][0]))) #read in one file as data frame
    ID = []
    for i in range(1, len(df) + 1):
        ID.append(i)
    df.insert(1, 'TaskID', ID)
    list_dfs.append(df) #append data frames

df_detailed_subj = pd.concat(list_dfs, axis = 0, ignore_index = True) # concatenate dfs into one

with open('./data_raw/dumps_detailed_subj.pkl', 'wb') as f:
    pickle.dump(df_detailed_subj, f)
    
#df_detailed_subj # inspect

## 2.3 Create `df_ptsot_results`

In [8]:
list_dfs = []

for key in dict_folder_file.keys(): 
    df = pd.read_csv('/'.join((data_path,key,dict_folder_file[key][1])),
                     names = ['QuestionNumber','CorrectResponseAngle','ActualResponseAngle','AbsoluteAngularError'],
                     header = None)
    ID = []
    for i in range(0, len(df)):
        ID.append(int(key[4:]))
    df.insert(0, 'UserID', ID)
    list_dfs.append(df)
    
df_ptsot_results = pd.concat(list_dfs, axis = 0, ignore_index = True, sort = False) # concatenate dfs into one

df_ptsot_results[~df_ptsot_results['QuestionNumber'].str.contains('Average')] # drop row containing 'Average'

df_ptsot_results

with open('./data_raw/dumps_ptsot_results.pkl', 'wb') as f:
    pickle.dump(df_ptsot_results, f)
    
#df_ptsot_results # inspect

## 2.4 Create `df_JRD`

In [25]:
list_dfs = []
for key in dict_folder_file.keys(): 
        if len(dict_folder_file[key]) > 3:
            df = pd.read_csv('/'.join((data_path, key, dict_folder_file[key][3])), skipinitialspace = True)
            df.columns = ['UserID' if x == 'PartID ' else x for x in df.columns]
            df.drop(df.index[0], inplace = True)
            list_dfs.append(df)
df_JRD = pd.concat(list_dfs, axis = 0, ignore_index = True,sort = False)

with open('./data_raw/dumps_JRD.pkl', 'wb') as f:
    pickle.dump(df_JRD, f)
    
#df_JRD # inspect

## 2.5 Create `df_sbsod`

In [28]:
list_dfs = []
for key in dict_folder_file.keys(): 
        if len(dict_folder_file[key]) > 3:
            with open('/'.join((data_path, key, dict_folder_file[key][2]))) as file:
                data = file.read().replace("merken,", "merken;").replace("nachdenke,", "nachdenke;").replace("(N,S,O,W)", "(N;S;O;W)").replace("(N, S, E, W)", "(N; S; E; W)").replace("Probleme,", "Probleme;").replace("wichtig,", "wichtig;").replace("erinnern,", "erinnern;")
                if len(data) > 0:
                    TESTDATA = StringIO(data)
                    df = pd.read_csv(TESTDATA, sep = ",")
                    ID = []
                    for i in range(0, len(df)):
                        ID.append(int(key[4:]))
                    df.insert(0, 'UserID', ID)
                    list_dfs.append(df)

df_sbsod = pd.concat(list_dfs, axis = 0, ignore_index = True, sort = False)

with open('./data_raw/dumps_sbsod.pkl', 'wb') as f:
    pickle.dump(df_sbsod, f)
    
#df_sbsod # inspect

## 2.6 Create `df_RouteTest`

In [36]:
list_dfs = []
for key in dict_folder_file.keys(): 
        if len(dict_folder_file[key]) == 6:
            print(key)
            df = pd.read_csv('/'.join((data_path, key, dict_folder_file[key][4])), skipinitialspace = True)
            list_dfs.append(df)

df_RouteTest = pd.concat(list, axis = 0, ignore_index = True, sort = False)

with open('./data_raw/dumps_RouteTest.pkl', 'wb') as f:
    pickle.dump(df_RouteTest, f)
    
#df_RouteTest # inspect