In [1]:
import import_ipynb
import numpy as np
from statistics import mean, stdev
import random

random.seed(10)

from copy import deepcopy

from fetch_data import events_without_movement, plot_names

importing Jupyter notebook from fetch_data.ipynb




INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


In [None]:
def get_eventct_stats(data):
    events_cts = [[] for e in events_without_movement]
    mean_eventct = [0 for e in events_without_movement]
    sdv_eventct = [1 for e in events_without_movement]
    
    for event_type in range(len(events_without_movement)):
        for s_id in data:
            for log in data[s_id]['logs']:
                events_cts[event_type].append(log['event'][event_type])
    
    for event_type in range(len(events_without_movement)):
        if len(events_cts[event_type]) > 0:
            mean_eventct[event_type] = mean(events_cts[event_type])
            sdv_eventct[event_type] = stdev(events_cts[event_type])
        
    return mean_eventct, sdv_eventct

In [None]:
def apply_zscore_norm(data, mean_eventct, sd_eventct):
    for event_type in range(len(events_without_movement)):
        for s_id in data:
            for log in data[s_id]['logs']:
                m = mean_eventct[event_type]
                s = sd_eventct[event_type]
                log['event'][event_type] = (log['event'][event_type] - m) / s
                
    return data

In [None]:
def create_subsequences(data, seq_length=20, emb_dim=32):
    subsequences = {}
    
    for s_id in data:
        if len(data[s_id]['logs']) == 0:
            print(s_id)
            continue
        default_log = {'event': [0 for e in range(len(events_without_movement))],
                       'goal': [0 for g in range(len(plot_names))], 
                       'response_emb': [random.uniform(-1,1) for e in range(emb_dim)],
                       'next_rating': data[s_id]['logs'][0]['next_rating'],
                       'response_number': 0}
        new_entry = {'pretest': data[s_id]['pretest'],
                     'posttest': data[s_id]['posttest'],
                     'logs': [[deepcopy(default_log) for s in range(seq_length-1)]]}
        current_idx = 0
        
        for log in data[s_id]['logs']:
            if len(new_entry['logs'][current_idx]) >= seq_length:
                new_entry['logs'].append(deepcopy(new_entry['logs'][current_idx][1:seq_length]))
                current_idx += 1
            new_entry['logs'][current_idx].append(deepcopy(log))
        subsequences[s_id] = new_entry
        
    return subsequences

In [None]:
def get_hybrid_combination(data, seq_length=20):
    X = []
    actions_responses = []
    pretests = []
    ids = []
    labels = []
    posttests = []
    ratings = []
    response_ids = []
    
    for s_id in data:
        for log in data[s_id]['logs']: # each 'log' is a subsequence of actions
            ids.append(s_id)
            pretests.append(data[s_id]['pretest']/17.)
            posttests.append(data[s_id]['posttest']/17.)
            ratings.append(float(log[seq_length-1]['next_rating'])/5.)
            response_ids.append(s_id+str(log[seq_length-1]['response_number']))
            actions_responses.append([])
            for s in range(seq_length):
                actions_responses[len(actions_responses)-1].append(deepcopy(log[s]['event']))
                actions_responses[len(actions_responses)-1][s].extend(deepcopy(log[s]['goal']))
                actions_responses[len(actions_responses)-1][s].extend(deepcopy(log[s]['response_emb']))
                actions_responses[len(actions_responses)-1][s] = np.array(actions_responses[len(actions_responses)-1][s])
                    
    X = [np.array(actions_responses), np.array(pretests)]
    labels = [np.array(posttests), np.array(ratings)]
    joint_ids = {'ids': ids, 'response_ids': response_ids}
    
    return X, labels, joint_ids

In [None]:
def get_hybrid_fusion_preprocessing(data, seq_length=20):
    copy_data = deepcopy(data)
    
    mean_eventct, sd_eventct = get_eventct_stats(copy_data['Train'])  
    copy_data['Train'] = apply_zscore_norm(copy_data['Train'], mean_eventct, sd_eventct) 
    copy_data['Val'] = apply_zscore_norm(copy_data['Val'], mean_eventct, sd_eventct)
    copy_data['Test'] = apply_zscore_norm(copy_data['Test'], mean_eventct, sd_eventct)
    
    copy_data['Train'] = create_subsequences(copy_data['Train'], seq_length=seq_length) 
    copy_data['Val'] = create_subsequences(copy_data['Val'], seq_length=seq_length) 
    copy_data['Test'] = create_subsequences(copy_data['Test'], seq_length=seq_length) 
    
    formatted_data = {}
    formatted_labels = {}
    ids = {}
    formatted_data['Train'], formatted_labels['Train'], ids['Train'] = get_hybrid_combination(copy_data['Train'], 
                                                                                              seq_length=seq_length)
    formatted_data['Val'], formatted_labels['Val'], ids['Val'] = get_hybrid_combination(copy_data['Val'], 
                                                                                        seq_length=seq_length)
    formatted_data['Test'], formatted_labels['Test'], ids['Test'] = get_hybrid_combination(copy_data['Test'], 
                                                                                           seq_length=seq_length)
    
    return formatted_data, formatted_labels, ids

In [None]:
def get_early_combination(data, seq_length=20):
    X = []
    actions_responses = []
    ids = []
    labels = []
    posttests = []
    ratings = []
    response_ids = []
    
    for s_id in data:
        for log in data[s_id]['logs']: # each 'log' is a subsequence of actions
            ids.append(s_id)
            posttests.append(data[s_id]['posttest']/17.)
            ratings.append(float(log[seq_length-1]['next_rating'])/5.)
            response_ids.append(s_id+str(log[seq_length-1]['response_number']))
            actions_responses.append([])
            for s in range(seq_length):
                actions_responses[len(actions_responses)-1].append(deepcopy(log[s]['event']))
                actions_responses[len(actions_responses)-1][s].extend(deepcopy(log[s]['goal']))
                actions_responses[len(actions_responses)-1][s].extend(deepcopy(log[s]['response_emb']))
                actions_responses[len(actions_responses)-1][s].append(data[s_id]['pretest']/17.)
                actions_responses[len(actions_responses)-1][s] = np.array(actions_responses[len(actions_responses)-1][s])
                    
    X = np.array(actions_responses)
    labels = [np.array(posttests), np.array(ratings)]
    joint_ids = {'ids': ids, 'response_ids': response_ids}
    
    return X, labels, joint_ids

In [None]:
def get_early_fusion_preprocessing(data, seq_length=20):
    copy_data = deepcopy(data)
    
    mean_eventct, sd_eventct = get_eventct_stats(copy_data['Train'])  
    copy_data['Train'] = apply_zscore_norm(copy_data['Train'], mean_eventct, sd_eventct) 
    copy_data['Val'] = apply_zscore_norm(copy_data['Val'], mean_eventct, sd_eventct)
    copy_data['Test'] = apply_zscore_norm(copy_data['Test'], mean_eventct, sd_eventct)
    
    copy_data['Train'] = create_subsequences(copy_data['Train'], seq_length=seq_length) 
    copy_data['Val'] = create_subsequences(copy_data['Val'], seq_length=seq_length) 
    copy_data['Test'] = create_subsequences(copy_data['Test'], seq_length=seq_length) 
    
    formatted_data = {}
    formatted_labels = {}
    ids = {}
    formatted_data['Train'], formatted_labels['Train'], ids['Train'] = get_early_combination(copy_data['Train'], 
                                                                                              seq_length=seq_length)
    formatted_data['Val'], formatted_labels['Val'], ids['Val'] = get_early_combination(copy_data['Val'], 
                                                                                        seq_length=seq_length)
    formatted_data['Test'], formatted_labels['Test'], ids['Test'] = get_early_combination(copy_data['Test'], 
                                                                                           seq_length=seq_length)
    
    return formatted_data, formatted_labels, ids

In [None]:
def get_late_combination(data, seq_length=20):
    X = []
    actions = []
    responses = []
    pretests = []
    ids = []
    labels = []
    posttests = []
    ratings = []
    response_ids = []
    
    for s_id in data:
        for log in data[s_id]['logs']: # each 'log' is a subsequence of actions
            ids.append(s_id)
            pretests.append(data[s_id]['pretest']/17.)
            posttests.append(data[s_id]['posttest']/17.)
            ratings.append(float(log[seq_length-1]['next_rating'])/5.)
            response_ids.append(s_id+str(log[seq_length-1]['response_number']))
            actions.append([])
            responses.append([])
            for s in range(seq_length):
                actions[len(actions)-1].append(deepcopy(log[s]['event']))
                actions[len(actions)-1][s].extend(deepcopy(log[s]['goal']))
                responses[len(responses)-1].append(deepcopy(log[s]['response_emb']))
                actions[len(actions)-1][s] = np.array(actions[len(actions)-1][s])
                responses[len(responses)-1][s] = np.array(responses[len(responses)-1][s])
                    
    X = [np.array(actions), np.array(responses), np.array(pretests)]
    labels = [np.array(posttests), np.array(ratings)]
    joint_ids = {'ids': ids, 'response_ids': response_ids}
    
    return X, labels, joint_ids

In [None]:
def get_late_fusion_preprocessing(data, seq_length=20):
    copy_data = deepcopy(data)
    
    mean_eventct, sd_eventct = get_eventct_stats(copy_data['Train'])  
    copy_data['Train'] = apply_zscore_norm(copy_data['Train'], mean_eventct, sd_eventct) 
    copy_data['Val'] = apply_zscore_norm(copy_data['Val'], mean_eventct, sd_eventct)
    copy_data['Test'] = apply_zscore_norm(copy_data['Test'], mean_eventct, sd_eventct)
    
    copy_data['Train'] = create_subsequences(copy_data['Train'], seq_length=seq_length) 
    copy_data['Val'] = create_subsequences(copy_data['Val'], seq_length=seq_length) 
    copy_data['Test'] = create_subsequences(copy_data['Test'], seq_length=seq_length) 
    
    formatted_data = {}
    formatted_labels = {}
    ids = {}
    formatted_data['Train'], formatted_labels['Train'], ids['Train'] = get_late_combination(copy_data['Train'], 
                                                                                              seq_length=seq_length)
    formatted_data['Val'], formatted_labels['Val'], ids['Val'] = get_late_combination(copy_data['Val'], 
                                                                                        seq_length=seq_length)
    formatted_data['Test'], formatted_labels['Test'], ids['Test'] = get_late_combination(copy_data['Test'], 
                                                                                           seq_length=seq_length)
    
    return formatted_data, formatted_labels, ids

In [None]:
def get_pretest_combination(data, seq_length=20):
    X = []
    actions = []
    responses = []
    pretests = []
    ids = []
    labels = []
    posttests = []
    ratings = []
    response_ids = []
    
    for s_id in data:
        for log in data[s_id]['logs']: # each 'log' is a subsequence of actions
            ids.append(s_id)
            pretests.append(data[s_id]['pretest']/17.)
            posttests.append(data[s_id]['posttest']/17.)
            ratings.append(float(log[seq_length-1]['next_rating'])/5.)
            response_ids.append(s_id+str(log[seq_length-1]['response_number']))
                    
    X = np.array(pretests)
    labels = [np.array(posttests), np.array(ratings)]
    joint_ids = {'ids': ids, 'response_ids': response_ids}
    
    return X, labels, joint_ids

In [None]:
def get_pretest_preprocessing(data, seq_length=20):
    copy_data = deepcopy(data)
    
    mean_eventct, sd_eventct = get_eventct_stats(copy_data['Train'])  
    copy_data['Train'] = apply_zscore_norm(copy_data['Train'], mean_eventct, sd_eventct) 
    copy_data['Val'] = apply_zscore_norm(copy_data['Val'], mean_eventct, sd_eventct)
    copy_data['Test'] = apply_zscore_norm(copy_data['Test'], mean_eventct, sd_eventct)
    
    copy_data['Train'] = create_subsequences(copy_data['Train'], seq_length=seq_length) 
    copy_data['Val'] = create_subsequences(copy_data['Val'], seq_length=seq_length) 
    copy_data['Test'] = create_subsequences(copy_data['Test'], seq_length=seq_length) 
    
    formatted_data = {}
    formatted_labels = {}
    ids = {}
    formatted_data['Train'], formatted_labels['Train'], ids['Train'] = get_pretest_combination(copy_data['Train'], 
                                                                                              seq_length=seq_length)
    formatted_data['Val'], formatted_labels['Val'], ids['Val'] = get_pretest_combination(copy_data['Val'], 
                                                                                        seq_length=seq_length)
    formatted_data['Test'], formatted_labels['Test'], ids['Test'] = get_pretest_combination(copy_data['Test'], 
                                                                                           seq_length=seq_length)
    
    return formatted_data, formatted_labels, ids

In [None]:
def get_reflection_combination(data, seq_length=20):
    X = []
    actions = []
    responses = []
    pretests = []
    ids = []
    labels = []
    posttests = []
    ratings = []
    response_ids = []
    
    for s_id in data:
        for log in data[s_id]['logs']: # each 'log' is a subsequence of actions
            ids.append(s_id)
            pretests.append(data[s_id]['pretest']/17.)
            posttests.append(data[s_id]['posttest']/17.)
            ratings.append(float(log[seq_length-1]['next_rating'])/5.)
            response_ids.append(s_id+str(log[seq_length-1]['response_number']))
            actions.append([])
            responses.append([])
            for s in range(seq_length):
                actions[len(actions)-1].append(deepcopy(log[s]['event']))
                actions[len(actions)-1][s].extend(deepcopy(log[s]['goal']))
                responses[len(responses)-1].append(deepcopy(log[s]['response_emb']))
                actions[len(actions)-1][s] = np.array(actions[len(actions)-1][s])
                responses[len(responses)-1][s] = np.array(responses[len(responses)-1][s])
                    
    X = np.array(responses)
    labels = [np.array(posttests), np.array(ratings)]
    joint_ids = {'ids': ids, 'response_ids': response_ids}
    
    return X, labels, joint_ids

In [None]:
def get_reflection_preprocessing(data, seq_length=20):
    copy_data = deepcopy(data)
    
    mean_eventct, sd_eventct = get_eventct_stats(copy_data['Train'])  
    copy_data['Train'] = apply_zscore_norm(copy_data['Train'], mean_eventct, sd_eventct) 
    copy_data['Val'] = apply_zscore_norm(copy_data['Val'], mean_eventct, sd_eventct)
    copy_data['Test'] = apply_zscore_norm(copy_data['Test'], mean_eventct, sd_eventct)
    
    copy_data['Train'] = create_subsequences(copy_data['Train'], seq_length=seq_length) 
    copy_data['Val'] = create_subsequences(copy_data['Val'], seq_length=seq_length) 
    copy_data['Test'] = create_subsequences(copy_data['Test'], seq_length=seq_length) 
    
    formatted_data = {}
    formatted_labels = {}
    ids = {}
    formatted_data['Train'], formatted_labels['Train'], ids['Train'] = get_reflection_combination(copy_data['Train'], 
                                                                                              seq_length=seq_length)
    formatted_data['Val'], formatted_labels['Val'], ids['Val'] = get_reflection_combination(copy_data['Val'], 
                                                                                        seq_length=seq_length)
    formatted_data['Test'], formatted_labels['Test'], ids['Test'] = get_reflection_combination(copy_data['Test'], 
                                                                                           seq_length=seq_length)
    
    return formatted_data, formatted_labels, ids

In [None]:
def get_reflection_pretest_combination(data, seq_length=20):
    X = []
    actions = []
    responses = []
    pretests = []
    ids = []
    labels = []
    posttests = []
    ratings = []
    response_ids = []
    
    for s_id in data:
        for log in data[s_id]['logs']: # each 'log' is a subsequence of actions
            ids.append(s_id)
            pretests.append(data[s_id]['pretest']/17.)
            posttests.append(data[s_id]['posttest']/17.)
            ratings.append(float(log[seq_length-1]['next_rating'])/5.)
            response_ids.append(s_id+str(log[seq_length-1]['response_number']))
            actions.append([])
            responses.append([])
            for s in range(seq_length):
                actions[len(actions)-1].append(deepcopy(log[s]['event']))
                actions[len(actions)-1][s].extend(deepcopy(log[s]['goal']))
                responses[len(responses)-1].append(deepcopy(log[s]['response_emb']))
                actions[len(actions)-1][s] = np.array(actions[len(actions)-1][s])
                responses[len(responses)-1][s] = np.array(responses[len(responses)-1][s])
                    
    X = [np.array(responses), np.array(pretests)]
    labels = [np.array(posttests), np.array(ratings)]
    joint_ids = {'ids': ids, 'response_ids': response_ids}
    
    return X, labels, joint_ids

In [None]:
def get_reflection_pretest_preprocessing(data, seq_length=20):
    copy_data = deepcopy(data)
    
    mean_eventct, sd_eventct = get_eventct_stats(copy_data['Train'])  
    copy_data['Train'] = apply_zscore_norm(copy_data['Train'], mean_eventct, sd_eventct) 
    copy_data['Val'] = apply_zscore_norm(copy_data['Val'], mean_eventct, sd_eventct)
    copy_data['Test'] = apply_zscore_norm(copy_data['Test'], mean_eventct, sd_eventct)
    
    copy_data['Train'] = create_subsequences(copy_data['Train'], seq_length=seq_length) 
    copy_data['Val'] = create_subsequences(copy_data['Val'], seq_length=seq_length) 
    copy_data['Test'] = create_subsequences(copy_data['Test'], seq_length=seq_length) 
    
    formatted_data = {}
    formatted_labels = {}
    ids = {}
    formatted_data['Train'], formatted_labels['Train'], ids['Train'] = get_reflection_pretest_combination(copy_data['Train'], 
                                                                                              seq_length=seq_length)
    formatted_data['Val'], formatted_labels['Val'], ids['Val'] = get_reflection_pretest_combination(copy_data['Val'], 
                                                                                        seq_length=seq_length)
    formatted_data['Test'], formatted_labels['Test'], ids['Test'] = get_reflection_pretest_combination(copy_data['Test'], 
                                                                                           seq_length=seq_length)
    
    return formatted_data, formatted_labels, ids

In [None]:
def get_logs_pretest_combination(data, seq_length=20):
    X = []
    actions = []
    responses = []
    pretests = []
    ids = []
    labels = []
    posttests = []
    ratings = []
    response_ids = []
    
    for s_id in data:
        for log in data[s_id]['logs']: # each 'log' is a subsequence of actions
            ids.append(s_id)
            pretests.append(data[s_id]['pretest']/17.)
            posttests.append(data[s_id]['posttest']/17.)
            ratings.append(float(log[seq_length-1]['next_rating'])/5.)
            response_ids.append(s_id+str(log[seq_length-1]['response_number']))
            actions.append([])
            responses.append([])
            for s in range(seq_length):
                actions[len(actions)-1].append(deepcopy(log[s]['event']))
                actions[len(actions)-1][s].extend(deepcopy(log[s]['goal']))
                responses[len(responses)-1].append(deepcopy(log[s]['response_emb']))
                actions[len(actions)-1][s] = np.array(actions[len(actions)-1][s])
                responses[len(responses)-1][s] = np.array(responses[len(responses)-1][s])
                    
    X = [np.array(actions), np.array(pretests)]
    labels = [np.array(posttests), np.array(ratings)]
    joint_ids = {'ids': ids, 'response_ids': response_ids}
    
    return X, labels, joint_ids

In [None]:
def get_logs_pretest_preprocessing(data, seq_length=20):
    copy_data = deepcopy(data)
    
    mean_eventct, sd_eventct = get_eventct_stats(copy_data['Train'])  
    copy_data['Train'] = apply_zscore_norm(copy_data['Train'], mean_eventct, sd_eventct) 
    copy_data['Val'] = apply_zscore_norm(copy_data['Val'], mean_eventct, sd_eventct)
    copy_data['Test'] = apply_zscore_norm(copy_data['Test'], mean_eventct, sd_eventct)
    
    copy_data['Train'] = create_subsequences(copy_data['Train'], seq_length=seq_length) 
    copy_data['Val'] = create_subsequences(copy_data['Val'], seq_length=seq_length) 
    copy_data['Test'] = create_subsequences(copy_data['Test'], seq_length=seq_length) 
    
    formatted_data = {}
    formatted_labels = {}
    ids = {}
    formatted_data['Train'], formatted_labels['Train'], ids['Train'] = get_logs_pretest_combination(copy_data['Train'], 
                                                                                              seq_length=seq_length)
    formatted_data['Val'], formatted_labels['Val'], ids['Val'] = get_logs_pretest_combination(copy_data['Val'], 
                                                                                        seq_length=seq_length)
    formatted_data['Test'], formatted_labels['Test'], ids['Test'] = get_logs_pretest_combination(copy_data['Test'], 
                                                                                           seq_length=seq_length)
    
    return formatted_data, formatted_labels, ids

In [None]:
def get_logs_responses_combination(data, seq_length=20):
    X = []
    actions_responses = []
    pretests = []
    ids = []
    labels = []
    posttests = []
    ratings = []
    response_ids = []
    
    for s_id in data:
        for log in data[s_id]['logs']: # each 'log' is a subsequence of actions
            ids.append(s_id)
            pretests.append(data[s_id]['pretest']/17.)
            posttests.append(data[s_id]['posttest']/17.)
            ratings.append(float(log[seq_length-1]['next_rating'])/5.)
            response_ids.append(s_id+str(log[seq_length-1]['response_number']))
            actions_responses.append([])
            for s in range(seq_length):
                actions_responses[len(actions_responses)-1].append(deepcopy(log[s]['event']))
                actions_responses[len(actions_responses)-1][s].extend(deepcopy(log[s]['goal']))
                actions_responses[len(actions_responses)-1][s].extend(deepcopy(log[s]['response_emb']))
                actions_responses[len(actions_responses)-1][s] = np.array(actions_responses[len(actions_responses)-1][s])
                    
    X = np.array(actions_responses)
    labels = [np.array(posttests), np.array(ratings)]
    joint_ids = {'ids': ids, 'response_ids': response_ids}
    
    return X, labels, joint_ids

In [None]:
def get_logs_responses_preprocessing(data, seq_length=20):
    copy_data = deepcopy(data)
    
    mean_eventct, sd_eventct = get_eventct_stats(copy_data['Train'])  
    copy_data['Train'] = apply_zscore_norm(copy_data['Train'], mean_eventct, sd_eventct) 
    copy_data['Val'] = apply_zscore_norm(copy_data['Val'], mean_eventct, sd_eventct)
    copy_data['Test'] = apply_zscore_norm(copy_data['Test'], mean_eventct, sd_eventct)
    
    copy_data['Train'] = create_subsequences(copy_data['Train'], seq_length=seq_length) 
    copy_data['Val'] = create_subsequences(copy_data['Val'], seq_length=seq_length) 
    copy_data['Test'] = create_subsequences(copy_data['Test'], seq_length=seq_length) 
    
    formatted_data = {}
    formatted_labels = {}
    ids = {}
    formatted_data['Train'], formatted_labels['Train'], ids['Train'] = get_logs_responses_combination(copy_data['Train'], 
                                                                                              seq_length=seq_length)
    formatted_data['Val'], formatted_labels['Val'], ids['Val'] = get_logs_responses_combination(copy_data['Val'], 
                                                                                        seq_length=seq_length)
    formatted_data['Test'], formatted_labels['Test'], ids['Test'] = get_logs_responses_combination(copy_data['Test'], 
                                                                                           seq_length=seq_length)
    
    return formatted_data, formatted_labels, ids

In [None]:
def get_logs_combination(data, seq_length=20):
    X = []
    actions = []
    responses = []
    pretests = []
    ids = []
    labels = []
    posttests = []
    ratings = []
    response_ids = []
    
    for s_id in data:
        for log in data[s_id]['logs']: # each 'log' is a subsequence of actions
            ids.append(s_id)
            pretests.append(data[s_id]['pretest']/17.)
            posttests.append(data[s_id]['posttest']/17.)
            ratings.append(float(log[seq_length-1]['next_rating'])/5.)
            response_ids.append(s_id+str(log[seq_length-1]['response_number']))
            actions.append([])
            responses.append([])
            for s in range(seq_length):
                actions[len(actions)-1].append(deepcopy(log[s]['event']))
                actions[len(actions)-1][s].extend(deepcopy(log[s]['goal']))
                responses[len(responses)-1].append(deepcopy(log[s]['response_emb']))
                actions[len(actions)-1][s] = np.array(actions[len(actions)-1][s])
                responses[len(responses)-1][s] = np.array(responses[len(responses)-1][s])
                    
    X = np.array(actions)
    labels = [np.array(posttests), np.array(ratings)]
    joint_ids = {'ids': ids, 'response_ids': response_ids}
    
    return X, labels, joint_ids

In [None]:
def get_logs_preprocessing(data, seq_length=20):
    copy_data = deepcopy(data)
    
    mean_eventct, sd_eventct = get_eventct_stats(copy_data['Train'])  
    copy_data['Train'] = apply_zscore_norm(copy_data['Train'], mean_eventct, sd_eventct) 
    copy_data['Val'] = apply_zscore_norm(copy_data['Val'], mean_eventct, sd_eventct)
    copy_data['Test'] = apply_zscore_norm(copy_data['Test'], mean_eventct, sd_eventct)
    
    copy_data['Train'] = create_subsequences(copy_data['Train'], seq_length=seq_length) 
    copy_data['Val'] = create_subsequences(copy_data['Val'], seq_length=seq_length) 
    copy_data['Test'] = create_subsequences(copy_data['Test'], seq_length=seq_length) 
    
    formatted_data = {}
    formatted_labels = {}
    ids = {}
    formatted_data['Train'], formatted_labels['Train'], ids['Train'] = get_logs_combination(copy_data['Train'], 
                                                                                              seq_length=seq_length)
    formatted_data['Val'], formatted_labels['Val'], ids['Val'] = get_logs_combination(copy_data['Val'], 
                                                                                        seq_length=seq_length)
    formatted_data['Test'], formatted_labels['Test'], ids['Test'] = get_logs_combination(copy_data['Test'], 
                                                                                           seq_length=seq_length)
    
    return formatted_data, formatted_labels, ids