In [1]:
# This file is a modified version of the original:
# https://www.kaggle.com/code/mark4h/jowilder-2nd-place-solution-1-features-code/notebook

In [2]:
# view all dynamic code (created in this file) in ipynb files in utils folder. 

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
import json
import numpy as np
import pandas as pd
import numba
import inspect


# use utils_my_ file to get inline explanation of code in the resulting C file/code.
#import utils.utils_my_ as utils
import utils.utils as utils

In [5]:
from numba.pycc import CC

In [6]:
PREPROCESSED_DIR = ''

In [7]:
with open(PREPROCESSED_DIR + "preprocess_info.json", "r") as f:
    preprocess_info = json.loads(f.read())

In [8]:
text = preprocess_info['TEXT']

In [9]:
text[0:5]

['',
 '*COUGH COUGH COUGH*',
 '*cough cough*',
 '*grumble grumble*',
 'A boring old shirt.']

In [10]:
class TextIndexClass():
    
    def __init__(self):
        self.__len__ = 0
        pass # no additional operations to be performed in the constructor.
    
TEXT = TextIndexClass()

for i, f in enumerate(text): # my_ [1:]
    # i, f => 1, *COUGH COUGH COUGH*
    
    if f == "":
        f = 'NaN'
    
    t = f.replace(" ", "_").replace(".", "_").replace("!", "_exclamation_").replace("?", "_question_").replace("\\", "_").replace("'", "_").replace(",", "_")
    # t => *COUGH_COUGH_COUGH*
    
    existing_index = -1

    try:
        # getattr(object, name, ..) => return the value of the named attribute of object. name must be a string. 
        existing_index = getattr(TEXT, t)
        print(f, t, existing_index, i)
    except AttributeError:
        pass
    
    assert existing_index == -1
    
    #break # my_ break
    TEXT.__setattr__(t, i)
    
TEXT.__len__ = len(preprocess_info['TEXT'])

In [11]:
# get all attributes of class TEXT.
#dir(TEXT)

inspect.getmembers(TEXT)[0:5] # (name, value) pairs sorted by name.

[('*COUGH_COUGH_COUGH*', 1),
 ('*cough_cough*', 2),
 ('*grumble_grumble*', 3),
 ('AND_I_know_who_took_Teddy_exclamation_', 8),
 ('AND_he_stole_Teddy_exclamation_', 9)]

In [12]:
class Lookup():
    
    def __init__(self):
        self.__len__ = 0
        pass

FQIDS = Lookup()
for i, n in enumerate(preprocess_info['FQIDS']):
    if n == "":
        FQIDS.__setattr__("NaN", i)
    else:
        FQIDS.__setattr__(n.replace(".", "_"), i)        
FQIDS.__len__ = len(preprocess_info['FQIDS'])

NAMES = Lookup()
for k, v in preprocess_info['name_map'].items():
    NAMES.__setattr__(k, v)
NAMES.__len__ = len(preprocess_info['name_map'])

EVENT_NAMES = Lookup()
for k, v in preprocess_info['event_name_map'].items():
    EVENT_NAMES.__setattr__(k, v)
EVENT_NAMES.__len__ = len(preprocess_info['event_name_map'])

ROOMS = Lookup()
for i, k in enumerate(preprocess_info['ROOMS']):
    ROOMS.__setattr__(k, i)
ROOMS.__len__ = len(preprocess_info['ROOMS'])

BUILDINGS = Lookup()
for i, k in enumerate(preprocess_info['BUILDINGS']):
    BUILDINGS.__setattr__(k, i)
BUILDINGS.__len__ = len(preprocess_info['BUILDINGS'])

TEXT_FQIDS = Lookup()
for i, k in enumerate(preprocess_info['TEXT_FQID']):
    if k == "":
        TEXT_FQIDS.__setattr__("NaN", i)
    else:
        TEXT_FQIDS.__setattr__(k.replace(".", "_"), i)
TEXT_FQIDS.__len__ = len(preprocess_info['TEXT_FQID'])

ROOM_FQIDS = Lookup()
for i, k in enumerate(preprocess_info['ROOM_FQIDS']):
    ROOM_FQIDS.__setattr__(k.replace(".", "_"), i)
ROOM_FQIDS.__len__ = len(preprocess_info['ROOM_FQIDS'])

In [13]:
inspect.getmembers(ROOM_FQIDS)[0:5]

[('__class__', __main__.Lookup),
 ('__delattr__',
  <method-wrapper '__delattr__' of Lookup object at 0x7f8709f3e790>),
 ('__dict__',
  {'__len__': 19,
   'tunic_capitol_0_hall': 0,
   'tunic_capitol_1_hall': 1,
   'tunic_capitol_2_hall': 2,
   'tunic_drycleaner_frontdesk': 3,
   'tunic_flaghouse_entry': 4,
   'tunic_historicalsociety_basement': 5,
   'tunic_historicalsociety_cage': 6,
   'tunic_historicalsociety_closet': 7,
   'tunic_historicalsociety_closet_dirty': 8,
   'tunic_historicalsociety_collection': 9,
   'tunic_historicalsociety_collection_flag': 10,
   'tunic_historicalsociety_entry': 11,
   'tunic_historicalsociety_frontdesk': 12,
   'tunic_historicalsociety_stacks': 13,
   'tunic_humanecology_frontdesk': 14,
   'tunic_kohlcenter_halloffame': 15,
   'tunic_library_frontdesk': 16,
   'tunic_library_microfiche': 17,
   'tunic_wildlife_center': 18}),
 ('__dir__', <function Lookup.__dir__()>),
 ('__doc__', None)]

In [14]:
assert len(preprocess_info['name_map']) == 6

# The error_model option controls the divide-by-zero behavior. Setting it to ‘python’ causes divide-by-zero to - 
# - raise exception like CPython. Setting it to ‘numpy’ causes divide-by-zero to set the result to +/-inf or nan.
@numba.jit(nopython=True, nogil=True, error_model='numpy')
def generate_name_counts(names):
    
    counts = np.zeros(6, dtype=np.int32)
    n = names.shape[0]
    
    for i in range(n):
        slot_i = names[i]
        if slot_i < 6:
            counts[slot_i] += 1
               
    return counts

@numba.jit(nopython=True, nogil=True, error_model='numpy')
def generate_event_name_counts(event_names):
    
    counts = np.zeros(11, dtype=np.int32)
    n = event_names.shape[0]
    
    for i in range(n):
        slot_i = event_names[i]
        if slot_i < 11:
            counts[slot_i] += 1
               
    return counts

In [15]:
@numba.jit(nopython=True, nogil=True, error_model='numpy')
def generate_building_counts(buildings):
    
    counts = np.zeros(10, dtype=np.int32)
    n = buildings.shape[0]
    
    for i in range(n):
        slot_i = buildings[i]
        if slot_i < 10:
            counts[slot_i] += 1
               
    return counts

@numba.jit(nopython=True, nogil=True, error_model='numpy')
def generate_room_counts(rooms):
    
    counts = np.zeros(13, dtype=np.int32)
    n = rooms.shape[0]
    
    for i in range(n):
        slot_i = rooms[i]
        if slot_i < 13:
            counts[slot_i] += 1
               
    return counts

In [16]:
inspect.getmembers(FQIDS)[0:5]

[('NaN', 0),
 ('__class__', __main__.Lookup),
 ('__delattr__',
  <method-wrapper '__delattr__' of Lookup object at 0x7f8709f3e350>),
 ('__dict__',
  {'__len__': 129,
   'NaN': 0,
   'archivist': 1,
   'archivist_glasses': 2,
   'block': 3,
   'block_0': 4,
   'block_1': 5,
   'block_badge': 6,
   'block_badge_2': 7,
   'block_magnify': 8,
   'block_nelson': 9,
   'block_tocollection': 10,
   'block_tomap1': 11,
   'block_tomap2': 12,
   'boss': 13,
   'businesscards': 14,
   'businesscards_card_0_next': 15,
   'businesscards_card_1_next': 16,
   'businesscards_card_bingo_bingo': 17,
   'businesscards_card_bingo_next': 18,
   'ch3start': 19,
   'chap1_finale': 20,
   'chap1_finale_c': 21,
   'chap2_finale': 22,
   'chap2_finale_c': 23,
   'chap4_finale_c': 24,
   'coffee': 25,
   'colorbook': 26,
   'confrontation': 27,
   'crane_ranger': 28,
   'cs': 29,
   'directory': 30,
   'directory_closeup_archivist': 31,
   'door_block_clean': 32,
   'door_block_talk': 33,
   'doorblock': 34,
  

In [17]:
fqids_features_src = f"""

@numba.jit(nopython=True, nogil=True, error_model='numpy')
def generate_fqids_counts(fqids):
    
    # {FQIDS.__len__} => FQIDS.__len__
    counts = np.zeros({FQIDS.__len__}, dtype=np.int32)
    n = fqids.shape[0]
    
    for i in range(n):
        slot_i = fqids[i]
        # {FQIDS.__len__} => FQIDS.__len__
        if slot_i < {FQIDS.__len__}:
            counts[slot_i] += 1
               
    return counts

@numba.jit(nopython=True, nogil=True, error_model='numpy')
def generate_fqids_count_features(out, fqids):
    fqids_counts = generate_fqids_counts(fqids)
    
    # {{FN.fqids_nunqiue}} => FN.fqids_nunqiue
    out[:, {{FN.fqids_nunqiue}}] = np.sum(fqids_counts != 0)
"""
fqids_features_names = []
for i, fqids in enumerate(preprocess_info['FQIDS']):
    fqids_features_src += f"""
    # {{FN.fqids_{fqids.replace(".", "_")}_count}} => FN.fqids_{fqids.replace(".", "_")}_count
    out[:, {{FN.fqids_{fqids.replace(".", "_")}_count}}] = fqids_counts[{i}]"""
    fqids_features_names.append(f'fqids_{fqids.replace(".", "_")}_count')
    
# view the func fqids_features_src in ipynb file in utils folder.

In [18]:
preprocess_info['FQIDS'][:4]

['', 'archivist', 'archivist_glasses', 'block']

In [19]:
fqids_features_names[:4]

['fqids__count',
 'fqids_archivist_count',
 'fqids_archivist_glasses_count',
 'fqids_block_count']

In [20]:
fqids_features_names[0:5]

['fqids__count',
 'fqids_archivist_count',
 'fqids_archivist_glasses_count',
 'fqids_block_count',
 'fqids_block_0_count']

In [21]:
room_features_src = f"""

@numba.jit(nopython=True, nogil=True, error_model='numpy')
def generate_room_first_last_counts_and_times(room, elapsed_time):
    
    # {ROOMS.__len__} => ROOMS.__len__
    first_occurrence_index = np.full({ROOMS.__len__}, np.nan, dtype=np.float32)    
    last_occurrence_index = np.full({ROOMS.__len__}, np.nan, dtype=np.float32)
    first_occurrence_time = np.full({ROOMS.__len__}, np.nan, dtype=np.float32)
    last_occurrence_time = np.full({ROOMS.__len__}, np.nan, dtype=np.float32)
    n = room.shape[0]
    
    for i in range(n):
        slot_i = room[i]
        # {ROOMS.__len__} => ROOMS.__len__
        if slot_i < {ROOMS.__len__}:
    
            last_occurrence_index[slot_i] = i
            last_occurrence_time[slot_i] = elapsed_time[i]

            if np.isnan(first_occurrence_index[slot_i]):
                first_occurrence_index[slot_i] = i
                first_occurrence_time[slot_i] = elapsed_time[i]
               
    return first_occurrence_index, last_occurrence_index, first_occurrence_time, last_occurrence_time

@numba.jit(nopython=True, nogil=True, error_model='numpy')
def generate_room_based_features(out, room, elapsed_time):

    n = room.shape[0]
    
    start_elapsed_time = elapsed_time[0]
    end_elapsed_time = elapsed_time[-1]
    total_time = end_elapsed_time - start_elapsed_time

    (
        first_occurrence_index,
        last_occurrence_index,
        first_occurrence_time,
        last_occurrence_time,
    ) = generate_room_first_last_counts_and_times(room, elapsed_time)
    
"""
room_features_names = []
for i, r in enumerate(preprocess_info['ROOMS']):
    room_features_src += f"""
    
    # {{FN.room_{r.replace(".", "_")}_first_occurrence_num_event_from_start}} => FN.room_{r.replace(".", "_")}_first_occurrence_num_event_from_start
    out[:, {{FN.room_{r.replace(".", "_")}_first_occurrence_num_event_from_start}}] = first_occurrence_index[{i}]
    
    # {{FN.room_{r.replace(".", "_")}_last_occurrence_num_event_from_start}} => FN.room_{r.replace(".", "_")}_last_occurrence_num_event_from_start
    out[:, {{FN.room_{r.replace(".", "_")}_last_occurrence_num_event_from_start}}] = last_occurrence_index[{i}]
    
    # {{FN.room_{r.replace(".", "_")}_first_occurrence_num_event_from_end}} => FN.room_{r.replace(".", "_")}_first_occurrence_num_event_from_end
    out[:, {{FN.room_{r.replace(".", "_")}_first_occurrence_num_event_from_end}}] = n - first_occurrence_index[{i}]
    
    # {{FN.room_{r.replace(".", "_")}_last_occurrence_num_event_from_end}} => FN.room_{r.replace(".", "_")}_last_occurrence_num_event_from_end
    out[:, {{FN.room_{r.replace(".", "_")}_last_occurrence_num_event_from_end}}] = n - last_occurrence_index[{i}]
    
    # {{FN.room_{r.replace(".", "_")}_first_occurrence_time}} => FN.room_{r.replace(".", "_")}_first_occurrence_time
    out[:, {{FN.room_{r.replace(".", "_")}_first_occurrence_time}}] = first_occurrence_time[{i}]
    
    # {{FN.room_{r.replace(".", "_")}_last_occurrence_time}} => FN.room_{r.replace(".", "_")}_last_occurrence_time
    out[:, {{FN.room_{r.replace(".", "_")}_last_occurrence_time}}] = last_occurrence_time[{i}]
    
    # {{FN.room_{r.replace(".", "_")}_first_occurrence_duration_from_start}} => FN.room_{r.replace(".", "_")}_first_occurrence_duration_from_start
    out[:, {{FN.room_{r.replace(".", "_")}_first_occurrence_duration_from_start}}] = first_occurrence_time[{i}] - start_elapsed_time
    
    # {{FN.room_{r.replace(".", "_")}_last_occurrence_duration_from_start}} => FN.room_{r.replace(".", "_")}_last_occurrence_duration_from_start
    out[:, {{FN.room_{r.replace(".", "_")}_last_occurrence_duration_from_start}}] = last_occurrence_time[{i}] - start_elapsed_time
    
    # {{FN.room_{r.replace(".", "_")}_first_occurrence_duration_from_end}} => FN.room_{r.replace(".", "_")}_first_occurrence_duration_from_end
    out[:, {{FN.room_{r.replace(".", "_")}_first_occurrence_duration_from_end}}] = end_elapsed_time - first_occurrence_time[{i}]
    
    # {{FN.room_{r.replace(".", "_")}_last_occurrence_duration_from_end}} => FN.room_{r.replace(".", "_")}_last_occurrence_duration_from_end
    out[:, {{FN.room_{r.replace(".", "_")}_last_occurrence_duration_from_end}}] = end_elapsed_time- last_occurrence_time[{i}]
    

    """
    room_features_names.extend([        
        f'room_{r.replace(".", "_")}_first_occurrence_num_event_from_start',
        f'room_{r.replace(".", "_")}_last_occurrence_num_event_from_start',
        f'room_{r.replace(".", "_")}_first_occurrence_num_event_from_end',
        f'room_{r.replace(".", "_")}_last_occurrence_num_event_from_end',
        f'room_{r.replace(".", "_")}_first_occurrence_time',
        f'room_{r.replace(".", "_")}_last_occurrence_time',
        f'room_{r.replace(".", "_")}_first_occurrence_duration_from_start',
        f'room_{r.replace(".", "_")}_last_occurrence_duration_from_start',
        f'room_{r.replace(".", "_")}_first_occurrence_duration_from_end',
        f'room_{r.replace(".", "_")}_last_occurrence_duration_from_end',
        ##
        #f'room_{r.replace(".", "_")}_first_occurrence_proportion_of_events_from_start',
        #f'room_{r.replace(".", "_")}_last_occurrence_proportion_of_events_from_start',
        #f'room_{r.replace(".", "_")}_first_occurrence_proportion_of_events_from_end',
        #f'room_{r.replace(".", "_")}_last_occurrence_proportion_of_events_from_end',
        #f'room_{r.replace(".", "_")}_first_occurrence_proportion_of_time_from_start',
        #f'room_{r.replace(".", "_")}_last_occurrence_proportion_of_time_from_start',
        #f'room_{r.replace(".", "_")}_first_occurrence_proportion_of_time_from_end',
        #f'room_{r.replace(".", "_")}_last_occurrence_proportion_of_time_from_end',
    ])

In [22]:
room_features_names[0:5]

['room_basement_first_occurrence_num_event_from_start',
 'room_basement_last_occurrence_num_event_from_start',
 'room_basement_first_occurrence_num_event_from_end',
 'room_basement_last_occurrence_num_event_from_end',
 'room_basement_first_occurrence_time']

In [23]:
@numba.jit(nopython=True, nogil=True, error_model='numpy')
def generate_building_first_last_counts_and_times(buildings, elapsed_time):
    
    first_occurrence_index = np.full(10, np.nan, dtype=np.float32)
    last_occurrence_index = np.full(10, np.nan, dtype=np.float32)
    first_occurrence_time = np.full(10, np.nan, dtype=np.float32)
    last_occurrence_time = np.full(10, np.nan, dtype=np.float32)
    n = buildings.shape[0]
    
    for i in range(n):
        slot_i = buildings[i]
        if slot_i < 10:
            last_occurrence_index[slot_i] = i
            last_occurrence_time[slot_i] = elapsed_time[i]

            if np.isnan(first_occurrence_index[slot_i]):
                first_occurrence_index[slot_i] = i
                first_occurrence_time[slot_i] = elapsed_time[i]
               
    return first_occurrence_index, last_occurrence_index, first_occurrence_time, last_occurrence_time

building_features_src = f"""
@numba.jit(nopython=True, nogil=True, error_model='numpy')
def generate_building_based_features(out, building, elapsed_time):

    n = building.shape[0]
    
    start_elapsed_time = elapsed_time[0]
    end_elapsed_time = elapsed_time[-1]
    total_time = end_elapsed_time - start_elapsed_time

    (
        first_occurrence_index,
        last_occurrence_index,
        first_occurrence_time,
        last_occurrence_time,
    ) = generate_building_first_last_counts_and_times(building, elapsed_time)
    
"""
building_features_names = []
for i, b in enumerate(preprocess_info['BUILDINGS']):
    building_features_src += f"""
    
    # {{FN.building_{b.replace(".", "_")}_first_occurrence_num_event_from_start}} => FN.building_{b.replace(".", "_")}_first_occurrence_num_event_from_start
    out[:, {{FN.building_{b.replace(".", "_")}_first_occurrence_num_event_from_start}}] = first_occurrence_index[{i}]
    
    # {{FN.building_{b.replace(".", "_")}_last_occurrence_num_event_from_start}} => FN.building_{b.replace(".", "_")}_last_occurrence_num_event_from_start
    out[:, {{FN.building_{b.replace(".", "_")}_last_occurrence_num_event_from_start}}] = last_occurrence_index[{i}]
    
    # {{FN.building_{b.replace(".", "_")}_first_occurrence_num_event_from_end}} => FN.building_{b.replace(".", "_")}_first_occurrence_num_event_from_end
    out[:, {{FN.building_{b.replace(".", "_")}_first_occurrence_num_event_from_end}}] = n - first_occurrence_index[{i}]
    
    # {{FN.building_{b.replace(".", "_")}_last_occurrence_num_event_from_end}} => FN.building_{b.replace(".", "_")}_last_occurrence_num_event_from_end
    out[:, {{FN.building_{b.replace(".", "_")}_last_occurrence_num_event_from_end}}] = n - last_occurrence_index[{i}]
    
    # {{FN.building_{b.replace(".", "_")}_first_occurrence_time}} => FN.building_{b.replace(".", "_")}_first_occurrence_time
    out[:, {{FN.building_{b.replace(".", "_")}_first_occurrence_time}}] = first_occurrence_time[{i}]
    
    # {{FN.building_{b.replace(".", "_")}_last_occurrence_time}} => FN.building_{b.replace(".", "_")}_last_occurrence_time
    out[:, {{FN.building_{b.replace(".", "_")}_last_occurrence_time}}] = last_occurrence_time[{i}]
    
    # {{FN.building_{b.replace(".", "_")}_first_occurrence_duration_from_start}} => FN.building_{b.replace(".", "_")}_first_occurrence_duration_from_start
    out[:, {{FN.building_{b.replace(".", "_")}_first_occurrence_duration_from_start}}] = first_occurrence_time[{i}] - start_elapsed_time
    
    # {{FN.building_{b.replace(".", "_")}_last_occurrence_duration_from_start}} => FN.building_{b.replace(".", "_")}_last_occurrence_duration_from_start
    out[:, {{FN.building_{b.replace(".", "_")}_last_occurrence_duration_from_start}}] = last_occurrence_time[{i}] - start_elapsed_time
    
    # {{FN.building_{b.replace(".", "_")}_first_occurrence_duration_from_end}} => FN.building_{b.replace(".", "_")}_first_occurrence_duration_from_end
    out[:, {{FN.building_{b.replace(".", "_")}_first_occurrence_duration_from_end}}] = end_elapsed_time - first_occurrence_time[{i}]
    
    # {{FN.building_{b.replace(".", "_")}_last_occurrence_duration_from_end}} => FN.building_{b.replace(".", "_")}_last_occurrence_duration_from_end
    out[:, {{FN.building_{b.replace(".", "_")}_last_occurrence_duration_from_end}}] = end_elapsed_time- last_occurrence_time[{i}]
    
    """
    building_features_names.extend([
        f'building_{b.replace(".", "_")}_first_occurrence_num_event_from_start',
        f'building_{b.replace(".", "_")}_last_occurrence_num_event_from_start',
        f'building_{b.replace(".", "_")}_first_occurrence_num_event_from_end',
        f'building_{b.replace(".", "_")}_last_occurrence_num_event_from_end',
        f'building_{b.replace(".", "_")}_first_occurrence_time',
        f'building_{b.replace(".", "_")}_last_occurrence_time',
        f'building_{b.replace(".", "_")}_first_occurrence_duration_from_start',
        f'building_{b.replace(".", "_")}_last_occurrence_duration_from_start',
        f'building_{b.replace(".", "_")}_first_occurrence_duration_from_end',
        f'building_{b.replace(".", "_")}_last_occurrence_duration_from_end',
        ##
        #f'building_{b.replace(".", "_")}_first_occurrence_proportion_of_events_from_start',
        #f'building_{b.replace(".", "_")}_last_occurrence_proportion_of_events_from_start',
        #f'building_{b.replace(".", "_")}_first_occurrence_proportion_of_events_from_end',
        #f'building_{b.replace(".", "_")}_last_occurrence_proportion_of_events_from_end',
        #f'building_{b.replace(".", "_")}_first_occurrence_proportion_of_time_from_start',
        #f'building_{b.replace(".", "_")}_last_occurrence_proportion_of_time_from_start',
        #f'building_{b.replace(".", "_")}_first_occurrence_proportion_of_time_from_end',
        #f'building_{b.replace(".", "_")}_last_occurrence_proportion_of_time_from_end',
    ])

In [24]:
building_features_names[0:5]

['building_capitol_0_first_occurrence_num_event_from_start',
 'building_capitol_0_last_occurrence_num_event_from_start',
 'building_capitol_0_first_occurrence_num_event_from_end',
 'building_capitol_0_last_occurrence_num_event_from_end',
 'building_capitol_0_first_occurrence_time']

In [25]:
fqid_features_src = f"""

@numba.jit(nopython=True, nogil=True, error_model='numpy')
def generate_fqid_first_last_counts_and_times(fqid, elapsed_time):
    
    # {FQIDS.__len__} => FQIDS.__len__
    first_occurrence_index = np.full({FQIDS.__len__}, np.nan, dtype=np.float32)
    last_occurrence_index = np.full({FQIDS.__len__}, np.nan, dtype=np.float32)
    first_occurrence_time = np.full({FQIDS.__len__}, np.nan, dtype=np.float32)
    last_occurrence_time = np.full({FQIDS.__len__}, np.nan, dtype=np.float32)
    n = fqid.shape[0]
    
    for i in range(n):
        slot_i = fqid[i]
        # {FQIDS.__len__} => FQIDS.__len__
        if slot_i < {FQIDS.__len__}:
            last_occurrence_index[slot_i] = i
            last_occurrence_time[slot_i] = elapsed_time[i]

            if np.isnan(first_occurrence_index[slot_i]):
                first_occurrence_index[slot_i] = i
                first_occurrence_time[slot_i] = elapsed_time[i]
               
    return first_occurrence_index, last_occurrence_index, first_occurrence_time, last_occurrence_time

@numba.jit(nopython=True, nogil=True, error_model='numpy')
def generate_fqid_based_features(out, fqid, elapsed_time):

    n = fqid.shape[0]
    
    start_elapsed_time = elapsed_time[0]
    end_elapsed_time = elapsed_time[-1]
    total_time = end_elapsed_time - start_elapsed_time

    (
        first_occurrence_index,
        last_occurrence_index,
        first_occurrence_time,
        last_occurrence_time,
    ) = generate_fqid_first_last_counts_and_times(fqid, elapsed_time)
    
"""

fqids_to_use = [
    'tocloset_dirty',
    'gramps',
    'chap1_finale',
    'tomicrofiche',
    'tomap',
    'journals.pic_0.next',
    'directory.closeup.archivist',
    'directory',
    'journals',
]

fqid_features_names = []
for i, b in enumerate(preprocess_info['FQIDS']):
    
    if b not in fqids_to_use:
        continue
        
    # print(b)
    
    fqid_features_src += f"""
    
    # {{FN.fqid_{b.replace(".", "_")}_first_occurrence_num_event_from_start}} => FN.fqid_{b.replace(".", "_")}_first_occurrence_num_event_from_start
    out[:, {{FN.fqid_{b.replace(".", "_")}_first_occurrence_num_event_from_start}}] = first_occurrence_index[{i}]
    
    # {{FN.fqid_{b.replace(".", "_")}_last_occurrence_num_event_from_start}} => FN.fqid_{b.replace(".", "_")}_last_occurrence_num_event_from_start
    out[:, {{FN.fqid_{b.replace(".", "_")}_last_occurrence_num_event_from_start}}] = last_occurrence_index[{i}]
    
    # {{FN.fqid_{b.replace(".", "_")}_first_occurrence_num_event_from_end}} => FN.fqid_{b.replace(".", "_")}_first_occurrence_num_event_from_end
    out[:, {{FN.fqid_{b.replace(".", "_")}_first_occurrence_num_event_from_end}}] = n - first_occurrence_index[{i}]
    
    # {{FN.fqid_{b.replace(".", "_")}_last_occurrence_num_event_from_end}} => FN.fqid_{b.replace(".", "_")}_last_occurrence_num_event_from_end
    out[:, {{FN.fqid_{b.replace(".", "_")}_last_occurrence_num_event_from_end}}] = n - last_occurrence_index[{i}]
    
    # {{FN.fqid_{b.replace(".", "_")}_first_occurrence_time}} => FN.fqid_{b.replace(".", "_")}_first_occurrence_time
    out[:, {{FN.fqid_{b.replace(".", "_")}_first_occurrence_time}}] = first_occurrence_time[{i}]
    
    # {{FN.fqid_{b.replace(".", "_")}_last_occurrence_time}} => FN.fqid_{b.replace(".", "_")}_last_occurrence_time
    out[:, {{FN.fqid_{b.replace(".", "_")}_last_occurrence_time}}] = last_occurrence_time[{i}]
    
    # {{FN.fqid_{b.replace(".", "_")}_first_occurrence_duration_from_start}} => FN.fqid_{b.replace(".", "_")}_first_occurrence_duration_from_start
    out[:, {{FN.fqid_{b.replace(".", "_")}_first_occurrence_duration_from_start}}] = first_occurrence_time[{i}] - start_elapsed_time
    
    # {{FN.fqid_{b.replace(".", "_")}_last_occurrence_duration_from_start}} => FN.fqid_{b.replace(".", "_")}_last_occurrence_duration_from_start
    out[:, {{FN.fqid_{b.replace(".", "_")}_last_occurrence_duration_from_start}}] = last_occurrence_time[{i}] - start_elapsed_time
    
    # {{FN.fqid_{b.replace(".", "_")}_first_occurrence_duration_from_end}} => FN.fqid_{b.replace(".", "_")}_first_occurrence_duration_from_end
    out[:, {{FN.fqid_{b.replace(".", "_")}_first_occurrence_duration_from_end}}] = end_elapsed_time - first_occurrence_time[{i}]
    
    # {{FN.fqid_{b.replace(".", "_")}_last_occurrence_duration_from_end}} => FN.fqid_{b.replace(".", "_")}_last_occurrence_duration_from_end
    out[:, {{FN.fqid_{b.replace(".", "_")}_last_occurrence_duration_from_end}}] = end_elapsed_time - last_occurrence_time[{i}] 
    
    """
    fqid_features_names.extend([
        f'fqid_{b.replace(".", "_")}_first_occurrence_num_event_from_start',
        f'fqid_{b.replace(".", "_")}_last_occurrence_num_event_from_start',
        f'fqid_{b.replace(".", "_")}_first_occurrence_num_event_from_end',
        f'fqid_{b.replace(".", "_")}_last_occurrence_num_event_from_end',
        f'fqid_{b.replace(".", "_")}_first_occurrence_time',
        f'fqid_{b.replace(".", "_")}_last_occurrence_time',
        f'fqid_{b.replace(".", "_")}_first_occurrence_duration_from_start',
        f'fqid_{b.replace(".", "_")}_last_occurrence_duration_from_start',
        f'fqid_{b.replace(".", "_")}_first_occurrence_duration_from_end',
        f'fqid_{b.replace(".", "_")}_last_occurrence_duration_from_end',
        #
        #f'fqid_{b.replace(".", "_")}_first_occurrence_proportion_of_events_from_start',
        #f'fqid_{b.replace(".", "_")}_last_occurrence_proportion_of_events_from_start',
        #f'fqid_{b.replace(".", "_")}_first_occurrence_proportion_of_events_from_end',
        #f'fqid_{b.replace(".", "_")}_last_occurrence_proportion_of_events_from_end',
        #f'fqid_{b.replace(".", "_")}_first_occurrence_proportion_of_time_from_start',
        #f'fqid_{b.replace(".", "_")}_last_occurrence_proportion_of_time_from_start',
        #f'fqid_{b.replace(".", "_")}_first_occurrence_proportion_of_time_from_end',
        #f'fqid_{b.replace(".", "_")}_last_occurrence_proportion_of_time_from_end',
    ])

In [26]:
fqid_features_names[0:5]

['fqid_chap1_finale_first_occurrence_num_event_from_start',
 'fqid_chap1_finale_last_occurrence_num_event_from_start',
 'fqid_chap1_finale_first_occurrence_num_event_from_end',
 'fqid_chap1_finale_last_occurrence_num_event_from_end',
 'fqid_chap1_finale_first_occurrence_time']

In [27]:
level_based_features_src = f"""

@numba.jit(nopython=True, nogil=True, error_model='numpy')
def generate_level_counts(level):
    
    counts = np.zeros(23, dtype=np.int32)
    n = level.shape[0]
    
    for i in range(n):
        slot_i = level[i]
        if slot_i < 23:
            counts[slot_i] += 1
               
    return counts

@numba.jit(nopython=True, nogil=True, error_model='numpy')
def generate_level_based_features(out, level):
    level_counts = generate_level_counts(level)
    
    out[:, {{FN.level_with_most_events}}] = np.argmax(level_counts)
    out[:, {{FN.level_with_least_events}}] = np.argmin(level_counts)
    
"""
level_features_names = []
for l in range(23):
    level_based_features_src += f"""    
    # {{FN.level_{l}_event_count}} => FN.level_{l}_event_count
    out[:, {{FN.level_{l}_event_count}}] = level_counts[{l}]    
    """
    level_features_names.extend([
        f'level_{l}_event_count',
    ])

In [28]:
level_features_names[0:5]

['level_0_event_count',
 'level_1_event_count',
 'level_2_event_count',
 'level_3_event_count',
 'level_4_event_count']

In [29]:
@numba.jit(nopython=True, nogil=True, error_model='numpy')
def generate_building_visits_counts(buildings):
    
    counts = np.zeros(10, dtype=np.int32)
    n = buildings.shape[0]
    
    prev_b = -1
    for i in range(n):
        b = buildings[i]
        if b != prev_b:
            if b < 10:
                counts[b] += 1
            prev_b = b
               
    return counts

@numba.jit(nopython=True, nogil=True, error_model='numpy')
def generate_room_visits_counts(room):
    
    counts = np.zeros(13, dtype=np.int32)
    n = room.shape[0]
    
    prev_r = -1
    for i in range(n):
        r = room[i]
        if r != prev_r:
            if r < 13:
                counts[r] += 1
            prev_r = r
               
    return counts

In [30]:
@numba.jit(nopython=True, nogil=True, error_model='numpy')
def coordinates_distance_travelled_sum(x, y):
    
    total = np.float32(0)
    
    n = x.shape[0]
    
    for i in range(n-1):
        d = np.sqrt(np.power(x[i+1] - x[i], 2) +  np.power(y[i+1] - y[i], 2))
        
        if not np.isnan(d):
        
            total += d
        
    return total

In [31]:
@numba.jit(nopython=True, nogil=True, error_model='numpy')
def calculate_read_speeds_of_text(text_length, elapsed_time):
    
    n = text_length.shape[0]
    out_i = 0
    read_speed = np.empty(n, dtype=np.float32)
    
    for i in range(n-1):
        
        tl = text_length[i]
        
        if ~np.isnan(tl):
            
            et0 = elapsed_time[i]
            et1 = elapsed_time[i+1]
            
            t = et1 - et0
            
            rs = tl/t
            read_speed[out_i] = rs
            out_i += 1
         
    read_speed = read_speed[:out_i]
        
    return read_speed

In [32]:
FEATURE_NAMES = [
    'number_of_events',
    'level_group',
    'question_number',
    'session_start_time',
    'session_end_time',
    'session_duration',
    'name_basic_0_count',
    'name_undefined_1_count',
    'name_close_2_count',
    'name_open_3_count',
    'name_prev_4_count',
    'name_next_5_count',
    
    'is_level_group_0',
    'is_level_group_1',
    'is_level_group_2',
    
    'name_basic_0_proportion',
    'name_undefined_1_proportion',
    'name_close_2_proportion',
    'name_open_3_proportion',
    'name_prev_4_proportion',
    'name_next_5_proportion',
    
    'event_name_cutscene_click_0_count',
    'event_name_person_click_1_count',
    'event_name_navigate_click_2_count',
    'event_name_observation_click_3_count',
    'event_name_notification_click_4_count',
    'event_name_object_click_5_count',
    'event_name_object_hover_6_count',
    'event_name_map_hover_7_count',
    'event_name_map_click_8_count',
    'event_name_checkpoint_9_count',
    'event_name_notebook_click_10_count',
    
    'total_hover_duration',
    'average_hover_duration',
    
    'is_question_0',
    'is_question_1',
    'is_question_2',
    'is_question_3',
    'is_question_4',
    'is_question_5',
    'is_question_6',
    'is_question_7',
    'is_question_8',
    'is_question_9',
    'is_question_10',
    'is_question_11',
    'is_question_12',
    'is_question_13',
    'is_question_14',
    'is_question_15',
    'is_question_16',
    'is_question_17',
    'is_question_18',
    
    'hover_duration_max',
    'hover_duration_min',
    
    'session_weekday',
    
    'last_event_name',
    'last_name',
    
    'building_capitol_0_count',
    'building_capitol_1_count',
    'building_capitol_2_count',
    'building_drycleaner_count',
    'building_flaghouse_count',
    'building_historicalsociety_count',
    'building_humanecology_count',
    'building_kohlcenter_count',
    'building_library_count',
    'building_wildlife_count',
    
    'room_basement_count',
    'room_cage_count',
    'room_center_count',
    'room_closet_count',
    'room_closet_dirty_count',
    'room_collection_count',
    'room_collection_flag_count',
    'room_entry_count',
    'room_frontdesk_count',
    'room_hall_count',
    'room_halloffame_count',
    'room_microfiche_count',
    'room_stacks_count',
    
    'building_capitol_0_proportion_of_events',
    'building_capitol_1_proportion_of_events',
    'building_capitol_2_proportion_of_events',
    'building_drycleaner_proportion_of_events',
    'building_flaghouse_proportion_of_events',
    'building_historicalsociety_proportion_of_events',
    'building_humanecology_proportion_of_events',
    'building_kohlcenter_proportion_of_events',
    'building_library_proportion_of_events',
    'building_wildlife_proportion_of_events',
    
    'level_mean',
    'level_std',
    'level_with_most_events',
    'level_with_least_events',
    
    'building_nunqiue',
    'room_nunqiue',
    'fqids_nunqiue',
    
    'average_duration_per_event',
    
    'elapsed_time_diff_max',
    'elapsed_time_diff_min',
    'elapsed_time_diff_std',
    'elapsed_time_diff_median',
    
    'session_index_level_group_first',
    'session_index_level_group_last',
    'elapsed_time_sum',
    'elapsed_time_in_increasing_order',
    
    'session_index_diff_max',
    
    #####
    
    'building_capitol_0_visits_count',
    'building_capitol_1_visits_count',
    'building_capitol_2_visits_count',
    'building_drycleaner_visits_count',
    'building_flaghouse_visits_count',
    'building_historicalsociety_visits_count',
    'building_humanecology_visits_count',
    'building_kohlcenter_visits_count',
    'building_library_visits_count',
    'building_wildlife_visits_count',
    'building_visits_nunqiue',
    
    
    'room_basement_visits_count',
    'room_cage_visits_count',
    'room_center_visits_count',
    'room_closet_visits_count',
    'room_closet_dirty_visits_count',
    'room_collection_visits_count',
    'room_collection_flag_visits_count',
    'room_entry_visits_count',
    'room_frontdesk_visits_count',
    'room_hall_visits_count',
    'room_halloffame_visits_count',
    'room_microfiche_visits_count',
    'room_stacks_visits_count',
    'room_visits_nunqiue',
    
    'room_coord_distance_travelled',
    'screen_coord_distance_travelled',
    
    'text_read_speeds_mean',
    'text_read_speeds_std',
    
    ##
    'LG0_first_Leopold__why_don_t_you_help_me_set_up_in_the_Capitol_duration',
    'LG0_first_It_s_a_women_s_basketball_jersey_duration',
    'LG0_first_event_after_first_Why_don_t_you_head_to_the_Basketball_Center_and_rustle_up_some_clues_duration',
    'LG0_time_before_end_of_LG_first_read_I_need_to_get_to_the_Capitol_and_tell_Gramps_exclamation_',
    'LG0_first_Just_talking_to_Teddy_duration',
    'LG0_first_Hot_Dog_exclamation__I_knew_it_exclamation_duration',
    
    'LG0_to_LG1_gap_duration',
    'LG1_first_Youmans_was_a_suffragist_here_in_Wisconsin_duration',
    
    'LG1_microfiche_total_clicks',
    'LG0_plaque_event_click_count',
    
    'LG0_first_Could_be__But_we_need_evidence_exclamation_duration',
    'LG2_first_Hmm__You_could_try_the_Aldo_Leopold_Wildlife_Center__duration',
    'LG1_first_Who_could_ve_done_this_question__duration',
    'LG1_first_Nope__But_Youmans_and_other_suffragists_worked_hard_to_change_that__duration',
    'LG1_first_I_ve_got_a_stack_of_business_cards_from_my_favorite_cleaners__duration',
    'LG1_first_Where_did_you_get_that_coffee_question__duration',
    
    'LG1_max_fqid_magnify_duration',
    'LG1_first_fqid_businesscards_card_1_next_duration',
    'LG1_max_fqid_journals_duration',
    
    'LG0_L1_first_groupconvo_room_coor_x',
    'LG0_L1_first_report_open_duration',
    'LG0_L2_first_time_between_closing_tunic_and_gramps',
    'LG0_L4_first_chap1_finale_nagivation_click_room_coords_x',
    
    'LG1_L6_time_between_clicking_on_magnifying_glass_and_then_clicking_on_archivist_again',
    'LG1_L6_time_between_first_finish_talking_to_gramps_and_leaving_closet_dirty',
    
    'LG1_L6_number_of_events_between_opening_reader_and_clicking_on_paper2_bingo',
    
    'LG0_L0_elapsed_time_to_start_of_open_play',
    'LG1_L10_time_between_being_told_to_talk_to_librarian_and_speaking_to_librarian',
    
    'LG0_fqid_chap1_finale_last_occurrence_duration_from_end',
    
    'LG1_L6_trigger_scarf_duration',
    
    'LG1_L6_last_trigger_scarf_next_room_coor_x',
    'LG1_L6_time_between_finish_talking_to_gramps_and_leaving_room',
    
    'LG1_L6_time_between_first_being_told_about_logbook_and_getting_to_logbook',
    
    'LG1_L11_time_spent_reading_bingo_text',
    
    'LG1_L7_first_time_to_get_from_stacks_to_entry',
    'LG1_L7_first_time_to_read_workers_text',
    
    'LG1_L7_first_time_from_being_told_about_business_cards_to_clicking_on_business_cards',
    'LG2_L18_first_time_between_reaching_wildlife_center_and_clicking_on_coffee',
    
    'LG2_L21_first_time_between_closing_journals_and_reaching_capitol_2_hall',
    'LG1_L12_time_to_reach_capitol_1_hall_from_start_of_level',
    
    'LG1_L12_first_map_click_room_coord_x',
    
    'LG0_L3_opened_notebook',
    
    'LG1_L15_first_directory_y_coord_click',
    'LG1_L6_first_y_room_coords_of_next_click_after_finding_magnifying_glass',
    'LG1_L6_first_y_room_coords_of_next_click_after_archivist_mentions_difficulty_reading',
    'LG1_L7_first_y_room_coords_of_next_click_after_arriving_at_humanecology_frontdesk',
    
    'LG0_L2_fqid_cs_event_duration_mean',
    
    'LG0_L3_fqid_plaque_event_duration_sum',
    
    'LG0_L2_time_between_reading_cs_text_and_first_tunic_navigate_click_time',
    'LG0_L3_first_room_coord_x_click',
    
    'LG0_L3_nunique_room',
    
    'LG1_L6_first_y_room_coords_for_non_nan_fqid',
    
    'LG1_L12_first_x_room_coords',
    
    'LG0_L0_doorblock_message_count',
    
    'session_hour',
    'LG0_name_num_unique_count',
    
    'LG1_logbook_page_bingo_time_duration',
    'LG1_reader_paper2_bingo_time_duration',
]

FEATURE_NAMES.extend(fqids_features_names)
FEATURE_NAMES.extend(building_features_names)
FEATURE_NAMES.extend(room_features_names)
FEATURE_NAMES.extend(fqid_features_names)
FEATURE_NAMES.extend(level_features_names)

In [33]:
HISTORY_LST = []
for h in [
    'LG0_first_We_need_to_talk_about_that_missing_paperwork',
    'LG0_first_Leopold__why_don_t_you_help_me_set_up_in_the_Capitol',
    'LG0_first_It_s_a_women_s_basketball_jersey',
    'LG0_first_event_after_first_Why_don_t_you_head_to_the_Basketball_Center_and_rustle_up_some_clues_question_',
    'LG0_I_need_to_get_to_the_Capitol_and_tell_Gramps_exclamation_',
    'LG0_first_Just_talking_to_Teddy_',
    'LG0_first_Hot_Dog_exclamation__I_knew_it_exclamation_',
    'LG1_first_Youmans_was_a_suffragist_here_in_Wisconsin',
    'LG0_first_Could_be__But_we_need_evidence_exclamation_',
    'LG2_first_Hmm__You_could_try_the_Aldo_Leopold_Wildlife_Center_',
    'LG1_first_Who_could_ve_done_this_question_',
    'LG1_first_Nope__But_Youmans_and_other_suffragists_worked_hard_to_change_that_',
    'LG1_first_I_ve_got_a_stack_of_business_cards_from_my_favorite_cleaners_',
    'LG1_first_Where_did_you_get_that_coffee_question_',
]:
    HISTORY_LST.extend([
        h + '_time',
        h + '_index',
        h + '_duration',
    ])

HISTORY_LST.extend([
    'LG0_last_event_time',
    'LG0_last_event_index',
    'LG1_first_event_time',
    'LG1_first_event_index',
    'LG1_last_event_time',
    'LG1_last_event_index',
    'LG1_microfiche_page_2_duration',
    'LG1_microfiche_total_clicks',
    'LG0_plaque_event_click_count',
    'LG2_first_event_time',
    'LG2_first_event_index',
    'LG2_last_event_time',
    'LG2_last_event_index',
    
    'LG1_max_fqid_magnify_duration',
    'LG1_first_fqid_businesscards_card_1_next_duration',
    'LG1_max_fqid_journals_duration',
    
    'LG0_L1_first_groupconvo_room_coor_x',
    
    'LG0_L1_first_report_open_time',
    'LG0_L1_first_report_close_time',
    
    'LG0_L2_last_tunic_close_time',
    'LG0_L2_first_gramps_time',
    'LG0_L4_first_chap1_finale_nagivation_click_room_coords_x',
    
    'LG1_L6_Ah__that_s_better_exclamation__previous_time',
    'LG1_L6_last_magnify_time',
    
    'LG1_L6_first_finished_talking_to_gramps',
    'LG1_L6_first_time_leave_closet_dirty',
    
    'LG1_L9_first_reader_index',
    'LG1_L9_first_click_reader_paper2_bingo_index',
    
    'LG0_L0_start_elapsed_time',
    'LG0_L0_first_navigate_click_time',
    
    'LG1_L10_starts_speaking_to_librarian_time',
    'LG1_L10_first_Wells_exclamation__What_was_he_doing_here_question__I_should_ask_the_librarian_time',
    
    'LG0_last_chap1_finale_time',
    
    'LG1_L6_last_trigger_scarf_time',
    'LG1_L6_first_trigger_scarf_prev_time',
    
    'LG1_L6_last_trigger_scarf_next_room_coor_x',
    
    'LG1_L6_first_toentry_time',
    
    'LG1_L8_first_time_told_about_logbook',
    'LG1_L8_first_logbook_time',
    
    'LG1_L11_first_Hey__this_is_Youmans_exclamation__time',
    'LG1_L11_first_Hey__this_is_Youmans_exclamation__prev_time',
    'LG1_L11_first_I_should_go_to_the_Capitol_and_tell_everyone_exclamation__time',
    
    'LG1_L7_first_tostacks_time',
    'LG1_L7_first_toentry_time',
    
    'LG1_L7_first_Hello_there_exclamation_prev_time',
    'LG1_L7_first_Why_don_t_you_take_a_look_question__time',
    
    'LG1_L7_first_businesscards_time',
    
    'LG2_L18_first_reach_wildlife_center_time',
    'LG2_L18_first_coffee_time',
    
    'LG2_L21_first_journals_close_time',
    'LG2_L22_first_reach_capitol_2_hall_time',
    
    'LG1_L12_first_reach_capitol_1_hall_time',
    'LG1_L12_start_time',
    
    'LG1_L12_first_map_click_room_coord_x',
    
    'LG0_L3_opened_notebook',
    
    'LG1_L15_first_directory_y_coord_click',
    'LG1_L6_first_y_room_coords_of_next_click_after_finding_magnifying_glass',
    'LG1_L6_first_y_room_coords_of_next_click_after_archivist_mentions_difficulty_reading',
    'LG1_L7_first_y_room_coords_of_next_click_after_arriving_at_humanecology_frontdesk',
    
    'LG0_L2_fqid_cs_event_duration_sum',
    'LG0_L2_fqid_cs_event_duration_count',
    
    'LG0_L3_fqid_plaque_event_duration_sum',
    
    'LG0_L2_first_tunic_navigate_click_time',
    'LG0_L2_last_text_fqid_cs_time',
    'LG0_L2_first_text_fqid_cs_time',
    
    'LG0_L3_nunique_room',
    'LG0_L3_first_room_coord_x_click',
    
    'LG1_L6_first_y_room_coords_for_non_nan_fqid',
    
    'LG1_L12_first_x_room_coords',
    
    'LG0_L0_doorblock_message_count',
    
    'LG0_name_num_unique_count',
    
    'LG1_logbook_page_bingo_start_time',
    'LG1_logbook_page_bingo_end_time',
    'LG1_reader_paper2_bingo_start_time',
    'LG1_reader_paper2_bingo_end_time',
])

In [34]:
SRC_LIST = []
src = """
@numba.jit(nopython=True, nogil=True, error_model='numpy')
def get_microfiche_page_times(
    x_en,
    x_n,
    x_fqid,
    x_t,
):
    
    n = x_en.shape[0]
    
    in_microfiche_room = False
    in_reader = False
    
    current_page_number = 0
    current_page_start_time = 0
    page_durations = np.zeros(3)
    page_durations_first_look = np.zeros(3)
    looked_at_page = np.zeros(3, dtype=np.bool_)
    num_clicks_on_page = np.zeros(3)
    num_clicks_on_page_first_look = np.zeros(3)
    
    time_in_reader = 0
    time_entered_reader = 0
    
    for i in range(n):
        
        if not in_microfiche_room:
            # {FQIDS.tomicrofiche} => FQIDS.tomicrofiche
            if x_fqid[i] == {FQIDS.tomicrofiche}:
                in_microfiche_room = True
        else:
            if in_reader:
                # {EVENT_NAMES.object_click}, {NAMES.close}, {FQIDS.reader} => EVENT_NAMES.object_click, NAMES.close, FQIDS.reader
                if x_en[i] == {EVENT_NAMES.object_click} and x_n[i] == {NAMES.close} and x_fqid[i] == {FQIDS.reader}:
                    in_reader = False
                    
                    prev_page_duration = x_t[i] - current_page_start_time
                    page_durations[current_page_number] += prev_page_duration
                    
                    if not looked_at_page[current_page_number]:
                        page_durations_first_look[current_page_number] += prev_page_duration
                        looked_at_page[current_page_number] = True
                        
                    time_in_reader += x_t[i] - time_entered_reader
                
                # {EVENT_NAMES.object_click} => EVENT_NAMES.object_click
                elif x_en[i] == {EVENT_NAMES.object_click}:
                
                    # {FQIDS.reader_paper0_next} => FQIDS.reader_paper0_next
                    if x_fqid[i] == {FQIDS.reader_paper0_next}:
                        next_page = 1                    
                    elif x_fqid[i] == {FQIDS.reader_paper0_prev}: # {FQIDS.reader_paper0_prev} => FQIDS.reader_paper0_prev
                        next_page = 2
                    elif x_fqid[i] == {FQIDS.reader_paper1_next}: # {FQIDS.reader_paper1_next} => FQIDS.reader_paper1_next
                        next_page = 2
                    elif x_fqid[i] == {FQIDS.reader_paper1_prev}: # {FQIDS.reader_paper1_prev} => FQIDS.reader_paper1_prev
                        next_page = 0
                    elif x_fqid[i] == {FQIDS.reader_paper2_next}: # {FQIDS.reader_paper2_next} => FQIDS.reader_paper2_next
                        next_page = 0
                    elif x_fqid[i] == {FQIDS.reader_paper2_prev}: # {FQIDS.reader_paper2_prev} => FQIDS.reader_paper2_prev
                        next_page = 1
                    else:
                        # next_page = -1
                        # print(x_fqid[i])
                        num_clicks_on_page[current_page_number] += 1
                        if not looked_at_page[current_page_number]:
                            num_clicks_on_page_first_look[current_page_number] += 1
                        continue
                    
                    prev_page_duration = x_t[i] - current_page_start_time
                    page_durations[current_page_number] += prev_page_duration
                    
                    if not looked_at_page[current_page_number]:
                        page_durations_first_look[current_page_number] += prev_page_duration
                        looked_at_page[current_page_number] = True
                    
                    current_page_number = next_page
                    current_page_start_time = x_t[i]
            else:
                # {EVENT_NAMES.navigate_click}, {FQIDS.reader} => EVENT_NAMES.navigate_click, FQIDS.reader
                if x_en[i] == {EVENT_NAMES.navigate_click} and x_fqid[i] == {FQIDS.reader}:
                    in_reader = True
                    current_page_number = 0
                    current_page_start_time = x_t[i]
                    time_entered_reader = x_t[i]
                
                # {EVENT_NAMES.navigate_click}, {FQIDS.tofrontdesk} => EVENT_NAMES.navigate_click, FQIDS.tofrontdesk
                elif x_en[i] == {EVENT_NAMES.navigate_click} and x_fqid[i] == {FQIDS.tofrontdesk}:
                    in_microfiche_room = False
            
    
    return page_durations, num_clicks_on_page, num_clicks_on_page_first_look, time_in_reader
"""
                                            
SRC_LIST.append(src)

In [35]:
src = """
@numba.jit(nopython=True, nogil=True, error_model='numpy')
def lg0_counts(
    lg,
    history,
    x_en,
    x_n,
    x_fqid,
):
    
    assert lg == 0
    
    n = x_en.shape[0]
    
    # {HIST.LG0_plaque_event_click_count} => HIST.LG0_plaque_event_click_count
    history[{HIST.LG0_plaque_event_click_count}] = 0
    
    for i in range(n):                
        # {EVENT_NAMES.object_click}, {FQIDS.plaque} => EVENT_NAMES.object_click, FQIDS.plaque
        if x_en[i] == {EVENT_NAMES.object_click} and x_fqid[i] == {FQIDS.plaque}:
            history[{HIST.LG0_plaque_event_click_count}] += 1
            
"""

SRC_LIST.append(src)

In [36]:
src = """
@numba.jit(nopython=True, nogil=True, error_model='numpy')
def generate_room_fqids_counts(room_fqids):
    
    # {ROOMS.__len__} => ROOMS.__len__
    counts = np.zeros({ROOMS.__len__}, dtype=np.int32)
    n = room_fqids.shape[0]
    
    for i in range(n):
        slot_i = room_fqids[i]
        # {ROOMS.__len__} => ROOMS.__len__
        if slot_i < {ROOMS.__len__}:
            counts[slot_i] += 1
               
    return counts
"""

SRC_LIST.append(src)

In [37]:
src = """
@numba.jit(nopython=True, nogil=True, error_model='numpy', parallel=False)
def lg0_per_level_fill_history(
    lg,
    level,
    history,
    x_et,
    x_index,
    x_text_numerical,
    x_en,
    x_n,
    x_fqid,
    x_room_coor_x,
    x_b,
    x_r,
    x_text_fqid,
):

    assert lg == 0
    
    # {HIST.LG0_L0_start_elapsed_time} => HIST.LG0_L0_start_elapsed_time
    history[{HIST.LG0_L0_start_elapsed_time}] = x_et[0]
    
    # {HIST.LG0_L3_opened_notebook} => HIST.LG0_L3_opened_notebook
    history[{HIST.LG0_L3_opened_notebook}] = 0
    
    # {HIST.LG0_L2_fqid_cs_event_duration_count} => HIST.LG0_L2_fqid_cs_event_duration_count
    history[{HIST.LG0_L2_fqid_cs_event_duration_count}] = 0
    
    # {HIST.LG0_L2_fqid_cs_event_duration_sum} => HIST.LG0_L2_fqid_cs_event_duration_sum
    history[{HIST.LG0_L2_fqid_cs_event_duration_sum}] = 0
    
    # {HIST.LG0_L3_fqid_plaque_event_duration_sum} => HIST.LG0_L3_fqid_plaque_event_duration_sum
    history[{HIST.LG0_L3_fqid_plaque_event_duration_sum}] = 0
    
    # {HIST.LG0_L0_doorblock_message_count} => HIST.LG0_L0_doorblock_message_count
    history[{HIST.LG0_L0_doorblock_message_count}] = 0
    
    # {ROOMS.__len__} => ROOMS.__len__
    L3_room_counts = np.zeros({ROOMS.__len__}, dtype=np.int32)

    num_events = x_et.shape[0]
    for i in range(num_events):

        if level[i] == 0:
            # {EVENT_NAMES.navigate_click} => EVENT_NAMES.navigate_click
            if x_en[i] == {EVENT_NAMES.navigate_click}:
                # {HIST.LG0_L0_first_navigate_click_time} => HIST.LG0_L0_first_navigate_click_time
                if np.isnan(history[{HIST.LG0_L0_first_navigate_click_time}]):
                    history[{HIST.LG0_L0_first_navigate_click_time}] = x_et[i]
            
            # {TEXT_FQIDS.tunic_historicalsociety_closet_doorblock} => TEXT_FQIDS.tunic_historicalsociety_closet_doorblock
            if x_text_fqid[i] == {TEXT_FQIDS.tunic_historicalsociety_closet_doorblock}:
                # {HIST.LG0_L0_doorblock_message_count} => HIST.LG0_L0_doorblock_message_count
                history[{HIST.LG0_L0_doorblock_message_count}] += 1
        
        elif level[i] == 1:
            # {FQIDS.groupconvo} => FQIDS.groupconvo
            if x_fqid[i] == {FQIDS.groupconvo}:
                # {HIST.LG0_L1_first_groupconvo_room_coor_x} => HIST.LG0_L1_first_groupconvo_room_coor_x
                if np.isnan(history[{HIST.LG0_L1_first_groupconvo_room_coor_x}]):                
                    history[{HIST.LG0_L1_first_groupconvo_room_coor_x}] = x_room_coor_x[i]
            
            # {TEXT.It_s_a_women_s_basketball_jersey_exclamation_} => TEXT.It_s_a_women_s_basketball_jersey_exclamation_
            if x_text_numerical[i] == {TEXT.It_s_a_women_s_basketball_jersey_exclamation_}:
                # {HIST.LG0_L1_first_report_open_time} => HIST.LG0_L1_first_report_open_time
                if np.isnan(history[{HIST.LG0_L1_first_report_open_time}]):
                    history[{HIST.LG0_L1_first_report_open_time}] = x_et[i]
                    
            # {FQIDS.report}, {EVENT_NAMES.object_click}, {NAMES.close} => FQIDS.report, EVENT_NAMES.object_click, NAMES.close
            if x_fqid[i] == {FQIDS.report} and x_en[i] == {EVENT_NAMES.object_click} and x_n[i] == {NAMES.close}:
                # {HIST.LG0_L1_first_report_close_time} => HIST.LG0_L1_first_report_close_time
                if np.isnan(history[{HIST.LG0_L1_first_report_close_time}]):
                    history[{HIST.LG0_L1_first_report_close_time}] = x_et[i]
                    
        elif level[i] == 2:
            # {FQIDS.tunic}, {NAMES.close} => FQIDS.tunic, NAMES.close
            if x_fqid[i] == {FQIDS.tunic} and x_n[i] == {NAMES.close}:
                history[{HIST.LG0_L2_last_tunic_close_time}] = x_et[i] # {HIST.LG0_L2_last_tunic_close_time} => HIST.LG0_L2_last_tunic_close_time
                
            elif x_fqid[i] == {FQIDS.gramps}: # {FQIDS.gramps} => FQIDS.gramps
                if np.isnan(history[{HIST.LG0_L2_first_gramps_time}]): # {HIST.LG0_L2_first_gramps_time} => HIST.LG0_L2_first_gramps_time
                    history[{HIST.LG0_L2_first_gramps_time}] = x_et[i]  
                    
            elif x_fqid[i] == {FQIDS.cs}: # {FQIDS.cs} => FQIDS.cs
                if (i + 1) < num_events:
                    history[{HIST.LG0_L2_fqid_cs_event_duration_sum}] += (x_et[i+1] - x_et[i]) # {HIST.LG0_L2_fqid_cs_event_duration_sum} => HIST.LG0_L2_fqid_cs_event_duration_sum
                    history[{HIST.LG0_L2_fqid_cs_event_duration_count}] += 1 # {HIST.LG0_L2_fqid_cs_event_duration_count} => HIST.LG0_L2_fqid_cs_event_duration_count
            
            # {FQIDS.tunic}, {EVENT_NAMES.navigate_click} => FQIDS.tunic, EVENT_NAMES.navigate_click
            if x_fqid[i] == {FQIDS.tunic} and x_en[i] == {EVENT_NAMES.navigate_click}:
                if np.isnan(history[{HIST.LG0_L2_first_tunic_navigate_click_time}]): # {HIST.LG0_L2_first_tunic_navigate_click_time} => HIST.LG0_L2_first_tunic_navigate_click_time
                    history[{HIST.LG0_L2_first_tunic_navigate_click_time}] = x_et[i]
            
            # {TEXT_FQIDS.tunic_historicalsociety_collection_cs} => TEXT_FQIDS.tunic_historicalsociety_collection_cs
            if x_text_fqid[i] == {TEXT_FQIDS.tunic_historicalsociety_collection_cs}:
                history[{HIST.LG0_L2_last_text_fqid_cs_time}] = x_et[i] # {HIST.LG0_L2_last_text_fqid_cs_time} => HIST.LG0_L2_last_text_fqid_cs_time
                if np.isnan(history[{HIST.LG0_L2_first_text_fqid_cs_time}]):
                    history[{HIST.LG0_L2_first_text_fqid_cs_time}] = x_et[i] # {HIST.LG0_L2_first_text_fqid_cs_time} => HIST.LG0_L2_first_text_fqid_cs_time
                    
        elif level[i] == 3:
            # {HIST.LG0_L3_first_room_coord_x_click} => HIST.LG0_L3_first_room_coord_x_click
            if np.isnan(history[{HIST.LG0_L3_first_room_coord_x_click}]):
                history[{HIST.LG0_L3_first_room_coord_x_click}] = x_room_coor_x[i]
            
            # {EVENT_NAMES.notebook_click}, {NAMES.open} => EVENT_NAMES.notebook_click, NAMES.open
            if x_en[i] == {EVENT_NAMES.notebook_click} and x_n[i] == {NAMES.open}:
                # {HIST.LG0_L3_opened_notebook} => HIST.LG0_L3_opened_notebook
                if history[{HIST.LG0_L3_opened_notebook}] == 0:
                    history[{HIST.LG0_L3_opened_notebook}] = 1
            
            # {FQIDS.plaque} => FQIDS.plaque
            if x_fqid[i] == {FQIDS.plaque}:
                if (i + 1) < num_events:
                    # {HIST.LG0_L3_fqid_plaque_event_duration_sum} => HIST.LG0_L3_fqid_plaque_event_duration_sum
                    history[{HIST.LG0_L3_fqid_plaque_event_duration_sum}] += (x_et[i+1] - x_et[i])
                    
            slot_i = x_r[i]
            if slot_i < {ROOMS.__len__}: # {ROOMS.__len__} => ROOMS.__len__
                L3_room_counts[slot_i] += 1
                    
        elif level[i] == 4:
            # {FQIDS.chap1_finale}, {EVENT_NAMES.navigate_click} => FQIDS.chap1_finale, EVENT_NAMES.navigate_click
            if x_fqid[i] == {FQIDS.chap1_finale} and x_en[i] == {EVENT_NAMES.navigate_click}:
                # {HIST.LG0_L4_first_chap1_finale_nagivation_click_room_coords_x} => HIST.LG0_L4_first_chap1_finale_nagivation_click_room_coords_x
                if np.isnan(history[{HIST.LG0_L4_first_chap1_finale_nagivation_click_room_coords_x}]):
                    history[{HIST.LG0_L4_first_chap1_finale_nagivation_click_room_coords_x}] = x_room_coor_x[i]
            
            
    L3_nunique_room = (L3_room_counts != 0).sum()
    history[{HIST.LG0_L3_nunique_room}] = L3_nunique_room
                
"""
                            
SRC_LIST.append(src)

In [38]:
src = """
@numba.jit(nopython=True, nogil=True, error_model='numpy', parallel=False)
def lg0_fill_history(
    lg,
    history,
    x_et,
    x_index,
    x_text_numerical,
    x_en,
    x_n,
    x_fqid,
    x_text_fqid,
):
    
    assert lg == 0
    
    num_events = x_et.shape[0]
    
    for i in range(num_events):
        # {TEXT.We_need_to_talk_about_that_missing_paperwork_} => TEXT.We_need_to_talk_about_that_missing_paperwork_
        if x_text_numerical[i] == {TEXT.We_need_to_talk_about_that_missing_paperwork_}:
            # {HIST.LG0_first_We_need_to_talk_about_that_missing_paperwork_time} => HIST.LG0_first_We_need_to_talk_about_that_missing_paperwork_time
            if np.isnan(history[{HIST.LG0_first_We_need_to_talk_about_that_missing_paperwork_time}]):
                # {HIST.LG0_first_We_need_to_talk_about_that_missing_paperwork_time} => HIST.LG0_first_We_need_to_talk_about_that_missing_paperwork_time
                history[{HIST.LG0_first_We_need_to_talk_about_that_missing_paperwork_time}] = x_et[i]
                if i != (num_events - 1):
                    # {HIST.LG0_first_We_need_to_talk_about_that_missing_paperwork_duration} => HIST.LG0_first_We_need_to_talk_about_that_missing_paperwork_duration
                    history[{HIST.LG0_first_We_need_to_talk_about_that_missing_paperwork_duration}] = x_et[i+1] - x_et[i]
                # {HIST.LG0_first_We_need_to_talk_about_that_missing_paperwork_index} => HIST.LG0_first_We_need_to_talk_about_that_missing_paperwork_index
                history[{HIST.LG0_first_We_need_to_talk_about_that_missing_paperwork_index}] = x_index[i]
            
        elif x_text_numerical[i] == {TEXT.Leopold__why_don_t_you_help_me_set_up_in_the_Capitol_question_}:
            # {TEXT.Leopold__why_don_t_you_help_me_set_up_in_the_Capitol_question_} => TEXT.Leopold__why_don_t_you_help_me_set_up_in_the_Capitol_question_
            # {HIST.LG0_first_Leopold__why_don_t_you_help_me_set_up_in_the_Capitol_time} => HIST.LG0_first_Leopold__why_don_t_you_help_me_set_up_in_the_Capitol_time
            if np.isnan(history[{HIST.LG0_first_Leopold__why_don_t_you_help_me_set_up_in_the_Capitol_time}]):
                history[{HIST.LG0_first_Leopold__why_don_t_you_help_me_set_up_in_the_Capitol_time}] = x_et[i]
                if i != (num_events - 1):
                    # {HIST.LG0_first_Leopold__why_don_t_you_help_me_set_up_in_the_Capitol_duration} => HIST.LG0_first_Leopold__why_don_t_you_help_me_set_up_in_the_Capitol_duration
                    history[{HIST.LG0_first_Leopold__why_don_t_you_help_me_set_up_in_the_Capitol_duration}] = x_et[i+1] - x_et[i]
                # {HIST.LG0_first_Leopold__why_don_t_you_help_me_set_up_in_the_Capitol_index} => HIST.LG0_first_Leopold__why_don_t_you_help_me_set_up_in_the_Capitol_index
                history[{HIST.LG0_first_Leopold__why_don_t_you_help_me_set_up_in_the_Capitol_index}] = x_index[i]
                
        elif x_text_numerical[i] == {TEXT.It_s_a_women_s_basketball_jersey_exclamation_}:
            # {TEXT.It_s_a_women_s_basketball_jersey_exclamation_} => TEXT.It_s_a_women_s_basketball_jersey_exclamation_
            # {HIST.LG0_first_It_s_a_women_s_basketball_jersey_time} => HIST.LG0_first_It_s_a_women_s_basketball_jersey_time
            if np.isnan(history[{HIST.LG0_first_It_s_a_women_s_basketball_jersey_time}]):
                history[{HIST.LG0_first_It_s_a_women_s_basketball_jersey_time}] = x_et[i]
                if i != (num_events - 1):
                    history[{HIST.LG0_first_It_s_a_women_s_basketball_jersey_duration}] = x_et[i+1] - x_et[i]
                history[{HIST.LG0_first_It_s_a_women_s_basketball_jersey_index}] = x_index[i]
                
        elif x_text_numerical[i] == {TEXT.Why_don_t_you_head_to_the_Basketball_Center_and_rustle_up_some_clues_question_}:
            # {TEXT.Why_don_t_you_head_to_the_Basketball_Center_and_rustle_up_some_clues_question_} => TEXT.Why_don_t_you_head_to_the_Basketball_Center_and_rustle_up_some_clues_question_
            # {HIST.LG0_first_event_after_first_Why_don_t_you_head_to_the_Basketball_Center_and_rustle_up_some_clues_question__time} => HIST.LG0_first_event_after_first_Why_don_t_you_head_to_the_Basketball_Center_and_rustle_up_some_clues_question__time
            if np.isnan(history[{HIST.LG0_first_event_after_first_Why_don_t_you_head_to_the_Basketball_Center_and_rustle_up_some_clues_question__time}]):
                if i != (num_events - 1):
                    history[{HIST.LG0_first_event_after_first_Why_don_t_you_head_to_the_Basketball_Center_and_rustle_up_some_clues_question__time}] = x_et[i+1]
                    if i != (num_events - 2):
                        history[{HIST.LG0_first_event_after_first_Why_don_t_you_head_to_the_Basketball_Center_and_rustle_up_some_clues_question__duration}] = x_et[i+2] - x_et[i+1]
                    history[{HIST.LG0_first_event_after_first_Why_don_t_you_head_to_the_Basketball_Center_and_rustle_up_some_clues_question__index}] = x_index[i+1]
                    
        elif x_text_numerical[i] == {TEXT.I_need_to_get_to_the_Capitol_and_tell_Gramps_exclamation_}:
            # {TEXT.I_need_to_get_to_the_Capitol_and_tell_Gramps_exclamation_} => TEXT.I_need_to_get_to_the_Capitol_and_tell_Gramps_exclamation_
            # {HIST.LG0_I_need_to_get_to_the_Capitol_and_tell_Gramps_exclamation__time} => HIST.LG0_I_need_to_get_to_the_Capitol_and_tell_Gramps_exclamation__time
            if np.isnan(history[{HIST.LG0_I_need_to_get_to_the_Capitol_and_tell_Gramps_exclamation__time}]):
                history[{HIST.LG0_I_need_to_get_to_the_Capitol_and_tell_Gramps_exclamation__time}] = x_et[i]
                if i != (num_events - 1):
                    history[{HIST.LG0_I_need_to_get_to_the_Capitol_and_tell_Gramps_exclamation__duration}] = x_et[i+1] - x_et[i]
                history[{HIST.LG0_I_need_to_get_to_the_Capitol_and_tell_Gramps_exclamation__index}] = x_index[i]
                
        elif x_text_numerical[i] == {TEXT.Just_talking_to_Teddy_}:
            # {TEXT.Just_talking_to_Teddy_} => TEXT.Just_talking_to_Teddy_
            # {HIST.LG0_first_Just_talking_to_Teddy__time} => HIST.LG0_first_Just_talking_to_Teddy__time
            if np.isnan(history[{HIST.LG0_first_Just_talking_to_Teddy__time}]):
                history[{HIST.LG0_first_Just_talking_to_Teddy__time}] = x_et[i]
                if i != (num_events - 1):
                    history[{HIST.LG0_first_Just_talking_to_Teddy__duration}] = x_et[i+1] - x_et[i]
                history[{HIST.LG0_first_Just_talking_to_Teddy__index}] = x_index[i]
                
        elif x_text_numerical[i] == {TEXT.Hot_Dog_exclamation__I_knew_it_exclamation_}:
            # {TEXT.Hot_Dog_exclamation__I_knew_it_exclamation_} => TEXT.Hot_Dog_exclamation__I_knew_it_exclamation_
            # {HIST.LG0_first_Hot_Dog_exclamation__I_knew_it_exclamation__time} => HIST.LG0_first_Hot_Dog_exclamation__I_knew_it_exclamation__time
            if np.isnan(history[{HIST.LG0_first_Hot_Dog_exclamation__I_knew_it_exclamation__time}]):
                history[{HIST.LG0_first_Hot_Dog_exclamation__I_knew_it_exclamation__time}] = x_et[i]
                if i != (num_events - 1):
                    history[{HIST.LG0_first_Hot_Dog_exclamation__I_knew_it_exclamation__duration}] = x_et[i+1] - x_et[i]
                history[{HIST.LG0_first_Hot_Dog_exclamation__I_knew_it_exclamation__index}] = x_index[i]
                
        elif x_text_numerical[i] == {TEXT.Could_be__But_we_need_evidence_exclamation_}:
            # {TEXT.Could_be__But_we_need_evidence_exclamation_} => TEXT.Could_be__But_we_need_evidence_exclamation_
            # {HIST.LG0_first_Could_be__But_we_need_evidence_exclamation__time} => HIST.LG0_first_Could_be__But_we_need_evidence_exclamation__time
            if np.isnan(history[{HIST.LG0_first_Could_be__But_we_need_evidence_exclamation__time}]):
                history[{HIST.LG0_first_Could_be__But_we_need_evidence_exclamation__time}] = x_et[i]
                if i != (num_events - 1):
                    history[{HIST.LG0_first_Could_be__But_we_need_evidence_exclamation__duration}] = x_et[i+1] - x_et[i]
                history[{HIST.LG0_first_Could_be__But_we_need_evidence_exclamation__index}] = x_index[i]
        
        # {FQIDS.chap1_finale} => FQIDS.chap1_finale
        if x_fqid[i] == {FQIDS.chap1_finale}:
            # {HIST.LG0_last_chap1_finale_time} => HIST.LG0_last_chap1_finale_time
            history[{HIST.LG0_last_chap1_finale_time}] = x_et[i]
    
    # {HIST.LG0_last_event_time} => HIST.LG0_last_event_time
    history[{HIST.LG0_last_event_time}] = x_et[-1]
    # {HIST.LG0_last_event_index} => HIST.LG0_last_event_index
    history[{HIST.LG0_last_event_index}] = x_index[-1]
"""
                                
SRC_LIST.append(src)

In [39]:
src = """
@numba.jit(nopython=True, nogil=True, error_model='numpy', parallel=False)
def calculate_LG0_features(
    lg,
    level,
    out,
    history,
    x_et,
    x_index,
    x_text_numerical,
    x_en,
    x_n,
    x_fqid,
    x_room_coor_x,
    x_b,
    x_r,
    x_text_fqid,
):
    
    if lg == 0:
        
        lg0_fill_history(
            lg,
            history,
            x_et,
            x_index,
            x_text_numerical,
            x_en,
            x_n,
            x_fqid,
            x_text_fqid,
        )
        
        lg0_counts(
            lg,
            history,
            x_en,
            x_n,
            x_fqid,
        )
        
        lg0_per_level_fill_history(
            lg,
            level,
            history,
            x_et,
            x_index,
            x_text_numerical,
            x_en,
            x_n,
            x_fqid,
            x_room_coor_x,
            x_b,
            x_r,
            x_text_fqid,
        )
    
    # {FN.LG0_plaque_event_click_count} => FN.LG0_plaque_event_click_count
    out[:, {FN.LG0_plaque_event_click_count}] = (
        history[{HIST.LG0_plaque_event_click_count}] 
    )  # {HIST.LG0_plaque_event_click_count} => HIST.LG0_plaque_event_click_count
    
    # {FN.LG0_first_Leopold__why_don_t_you_help_me_set_up_in_the_Capitol_duration} => FN.LG0_first_Leopold__why_don_t_you_help_me_set_up_in_the_Capitol_duration
    out[:, {FN.LG0_first_Leopold__why_don_t_you_help_me_set_up_in_the_Capitol_duration}] = (
        history[{HIST.LG0_first_Leopold__why_don_t_you_help_me_set_up_in_the_Capitol_duration}]
    ) # {HIST.LG0_first_Leopold__why_don_t_you_help_me_set_up_in_the_Capitol_duration} => HIST.LG0_first_Leopold__why_don_t_you_help_me_set_up_in_the_Capitol_duration
    
    # {FN.LG0_first_It_s_a_women_s_basketball_jersey_duration} => FN.LG0_first_It_s_a_women_s_basketball_jersey_duration
    out[:, {FN.LG0_first_It_s_a_women_s_basketball_jersey_duration}] = (
        history[{HIST.LG0_first_It_s_a_women_s_basketball_jersey_duration}]
    ) # {HIST.LG0_first_It_s_a_women_s_basketball_jersey_duration} => HIST.LG0_first_It_s_a_women_s_basketball_jersey_duration
    
    # {FN.LG0_first_event_after_first_Why_don_t_you_head_to_the_Basketball_Center_and_rustle_up_some_clues_duration} => FN.LG0_first_event_after_first_Why_don_t_you_head_to_the_Basketball_Center_and_rustle_up_some_clues_duration
    out[:, {FN.LG0_first_event_after_first_Why_don_t_you_head_to_the_Basketball_Center_and_rustle_up_some_clues_duration}] = (
        history[{HIST.LG0_first_event_after_first_Why_don_t_you_head_to_the_Basketball_Center_and_rustle_up_some_clues_question__duration}]
    ) 
    
    # {FN.LG0_time_before_end_of_LG_first_read_I_need_to_get_to_the_Capitol_and_tell_Gramps_exclamation_} => FN.LG0_time_before_end_of_LG_first_read_I_need_to_get_to_the_Capitol_and_tell_Gramps_exclamation_
    out[:, {FN.LG0_time_before_end_of_LG_first_read_I_need_to_get_to_the_Capitol_and_tell_Gramps_exclamation_}] = (
        history[{HIST.LG0_last_event_time}] - history[{HIST.LG0_I_need_to_get_to_the_Capitol_and_tell_Gramps_exclamation__time}]
    ) # {HIST.LG0_last_event_time}, {HIST.LG0_I_need_to_get_to_the_Capitol_and_tell_Gramps_exclamation__time} => HIST.LG0_last_event_time, HIST.LG0_I_need_to_get_to_the_Capitol_and_tell_Gramps_exclamation__time
    
    # {FN.LG0_first_Just_talking_to_Teddy_duration} => FN.LG0_first_Just_talking_to_Teddy_duration
    out[:, {FN.LG0_first_Just_talking_to_Teddy_duration}] = (
        history[{HIST.LG0_first_Just_talking_to_Teddy__duration}]
    ) # {HIST.LG0_first_Just_talking_to_Teddy__duration} => HIST.LG0_first_Just_talking_to_Teddy__duration
    
    # {FN.LG0_first_Hot_Dog_exclamation__I_knew_it_exclamation_duration} => FN.LG0_first_Hot_Dog_exclamation__I_knew_it_exclamation_duration
    out[:, {FN.LG0_first_Hot_Dog_exclamation__I_knew_it_exclamation_duration}] = (
        history[{HIST.LG0_first_Hot_Dog_exclamation__I_knew_it_exclamation__duration}]
    )
    
    # {FN.LG0_L1_first_groupconvo_room_coor_x} => FN.LG0_L1_first_groupconvo_room_coor_x
    out[:, {FN.LG0_L1_first_groupconvo_room_coor_x}] = (
        history[{HIST.LG0_L1_first_groupconvo_room_coor_x}]
    )
    
    # {FN.LG0_L1_first_report_open_duration} => FN.LG0_L1_first_report_open_duration
    out[:, {FN.LG0_L1_first_report_open_duration}] = (
        history[{HIST.LG0_L1_first_report_close_time}] - history[{HIST.LG0_L1_first_report_open_time}]
    ) # {HIST.LG0_L1_first_report_close_time}, {HIST.LG0_L1_first_report_open_time} => HIST.LG0_L1_first_report_close_time, HIST.LG0_L1_first_report_open_time
    
    # {FN.LG0_L2_first_time_between_closing_tunic_and_gramps} => FN.LG0_L2_first_time_between_closing_tunic_and_gramps
    out[:, {FN.LG0_L2_first_time_between_closing_tunic_and_gramps}] = (
        history[{HIST.LG0_L2_first_gramps_time}] - history[{HIST.LG0_L2_last_tunic_close_time}]
    ) # {HIST.LG0_L2_first_gramps_time}, {HIST.LG0_L2_last_tunic_close_time} => HIST.LG0_L2_first_gramps_time, HIST.LG0_L2_last_tunic_close_time
    
    # {FN.LG0_L4_first_chap1_finale_nagivation_click_room_coords_x} = FN.LG0_L4_first_chap1_finale_nagivation_click_room_coords_x
    out[:, {FN.LG0_L4_first_chap1_finale_nagivation_click_room_coords_x}] = history[{HIST.LG0_L4_first_chap1_finale_nagivation_click_room_coords_x}]
    
    # {FN.LG0_L0_elapsed_time_to_start_of_open_play} => FN.LG0_L0_elapsed_time_to_start_of_open_play
    out[:, {FN.LG0_L0_elapsed_time_to_start_of_open_play}] = (
        history[{HIST.LG0_L0_first_navigate_click_time}] - history[{HIST.LG0_L0_start_elapsed_time}]
    ) # {HIST.LG0_L0_first_navigate_click_time}, {HIST.LG0_L0_start_elapsed_time} => HIST.LG0_L0_first_navigate_click_time, HIST.LG0_L0_start_elapsed_time
    
    # {FN.LG0_first_Could_be__But_we_need_evidence_exclamation_duration} => FN.LG0_first_Could_be__But_we_need_evidence_exclamation_duration
    out[:, {FN.LG0_first_Could_be__But_we_need_evidence_exclamation_duration}] = (
        history[{HIST.LG0_first_Could_be__But_we_need_evidence_exclamation__duration}]
    )
    
    # {FN.LG0_fqid_chap1_finale_last_occurrence_duration_from_end} => FN.LG0_fqid_chap1_finale_last_occurrence_duration_from_end
    out[:, {FN.LG0_fqid_chap1_finale_last_occurrence_duration_from_end}] = (
        history[{HIST.LG0_last_event_time}] - history[{HIST.LG0_last_chap1_finale_time}]
    ) # {HIST.LG0_last_event_time}, {HIST.LG0_last_chap1_finale_time} => HIST.LG0_last_event_time, HIST.LG0_last_chap1_finale_time
    
    # {FN.LG0_L3_opened_notebook} => FN.LG0_L3_opened_notebook
    out[:, {FN.LG0_L3_opened_notebook}] = history[{HIST.LG0_L3_opened_notebook}]
    
    # {FN.LG0_L2_fqid_cs_event_duration_mean} => FN.LG0_L2_fqid_cs_event_duration_mean
    out[:, {FN.LG0_L2_fqid_cs_event_duration_mean}] = history[{HIST.LG0_L2_fqid_cs_event_duration_sum}]/history[{HIST.LG0_L2_fqid_cs_event_duration_count}]
    # {HIST.LG0_L2_fqid_cs_event_duration_sum}, {HIST.LG0_L2_fqid_cs_event_duration_count} => HIST.LG0_L2_fqid_cs_event_duration_sum, HIST.LG0_L2_fqid_cs_event_duration_count
    
    # {FN.LG0_L3_fqid_plaque_event_duration_sum} => FN.LG0_L3_fqid_plaque_event_duration_sum
    out[:, {FN.LG0_L3_fqid_plaque_event_duration_sum}] = history[{HIST.LG0_L3_fqid_plaque_event_duration_sum}]
    
    # {FN.LG0_L2_time_between_reading_cs_text_and_first_tunic_navigate_click_time} => FN.LG0_L2_time_between_reading_cs_text_and_first_tunic_navigate_click_time
    out[:, {FN.LG0_L2_time_between_reading_cs_text_and_first_tunic_navigate_click_time}] = (
        history[{HIST.LG0_L2_first_tunic_navigate_click_time}] - history[{HIST.LG0_L2_last_text_fqid_cs_time}]
    ) # {HIST.LG0_L2_first_tunic_navigate_click_time}, HIST.LG0_L2_last_text_fqid_cs_time => HIST.LG0_L2_first_tunic_navigate_click_time, HIST.LG0_L2_last_text_fqid_cs_time
    
    # {FN.LG0_L3_first_room_coord_x_click} => FN.LG0_L3_first_room_coord_x_click
    out[:, {FN.LG0_L3_first_room_coord_x_click}] = history[{HIST.LG0_L3_first_room_coord_x_click}]
    
    # {FN.LG0_L3_nunique_room} => FN.LG0_L3_nunique_room
    out[:, {FN.LG0_L3_nunique_room}] = history[{HIST.LG0_L3_nunique_room}]
    
    # {FN.LG0_L0_doorblock_message_count} => FN.LG0_L0_doorblock_message_count
    out[:, {FN.LG0_L0_doorblock_message_count}] = history[{HIST.LG0_L0_doorblock_message_count}]
    
"""

SRC_LIST.append(src)

In [40]:
src = """
@numba.jit(nopython=True, nogil=True, error_model='numpy', parallel=False)
def lg1_fill_history(
    lg,
    history,
    x_et,
    x_index,
    x_text_numerical,
    x_fqid,
    x_text_fqid_numerical,
    x_en,
):
    
    assert lg == 1
    
    num_events = x_et.shape[0]
    
    for i in range(num_events):
        # {TEXT.Youmans_was_a_suffragist_here_in_Wisconsin_} => TEXT.Youmans_was_a_suffragist_here_in_Wisconsin_
        if x_text_numerical[i] == {TEXT.Youmans_was_a_suffragist_here_in_Wisconsin_}:
            # {HIST.LG1_first_Youmans_was_a_suffragist_here_in_Wisconsin_time} => HIST.LG1_first_Youmans_was_a_suffragist_here_in_Wisconsin_time
            if np.isnan(history[{HIST.LG1_first_Youmans_was_a_suffragist_here_in_Wisconsin_time}]):
                history[{HIST.LG1_first_Youmans_was_a_suffragist_here_in_Wisconsin_time}] = x_et[i]
                if i != (num_events - 1):
                    # {HIST.LG1_first_Youmans_was_a_suffragist_here_in_Wisconsin_duration} => HIST.LG1_first_Youmans_was_a_suffragist_here_in_Wisconsin_duration
                    history[{HIST.LG1_first_Youmans_was_a_suffragist_here_in_Wisconsin_duration}] = x_et[i+1] - x_et[i]
                # {HIST.LG1_first_Youmans_was_a_suffragist_here_in_Wisconsin_index} => HIST.LG1_first_Youmans_was_a_suffragist_here_in_Wisconsin_index
                history[{HIST.LG1_first_Youmans_was_a_suffragist_here_in_Wisconsin_index}] = x_index[i]
                
        elif x_text_numerical[i] == {TEXT.Who_could_ve_done_this_question_}:
            # {TEXT.Who_could_ve_done_this_question_} => TEXT.Who_could_ve_done_this_question_
            # {HIST.LG1_first_Who_could_ve_done_this_question__time} => HIST.LG1_first_Who_could_ve_done_this_question__time
            if np.isnan(history[{HIST.LG1_first_Who_could_ve_done_this_question__time}]):
                history[{HIST.LG1_first_Who_could_ve_done_this_question__time}] = x_et[i]
                if i != (num_events - 1):
                    history[{HIST.LG1_first_Who_could_ve_done_this_question__duration}] = x_et[i+1] - x_et[i]
                history[{HIST.LG1_first_Who_could_ve_done_this_question__index}] = x_index[i]
                
        elif x_text_numerical[i] == {TEXT.Nope__But_Youmans_and_other_suffragists_worked_hard_to_change_that_}:
            # {TEXT.Nope__But_Youmans_and_other_suffragists_worked_hard_to_change_that_} => TEXT.Nope__But_Youmans_and_other_suffragists_worked_hard_to_change_that
            # {HIST.LG1_first_Nope__But_Youmans_and_other_suffragists_worked_hard_to_change_that__time} => HIST.LG1_first_Nope__But_Youmans_and_other_suffragists_worked_hard_to_change_that__time
            if np.isnan(history[{HIST.LG1_first_Nope__But_Youmans_and_other_suffragists_worked_hard_to_change_that__time}]):
                history[{HIST.LG1_first_Nope__But_Youmans_and_other_suffragists_worked_hard_to_change_that__time}] = x_et[i]
                if i != (num_events - 1):
                    history[{HIST.LG1_first_Nope__But_Youmans_and_other_suffragists_worked_hard_to_change_that__duration}] = x_et[i+1] - x_et[i]
                history[{HIST.LG1_first_Nope__But_Youmans_and_other_suffragists_worked_hard_to_change_that__index}] = x_index[i]
                
        elif x_text_numerical[i] == {TEXT.I_ve_got_a_stack_of_business_cards_from_my_favorite_cleaners_}:
            # {TEXT.I_ve_got_a_stack_of_business_cards_from_my_favorite_cleaners_} => TEXT.I_ve_got_a_stack_of_business_cards_from_my_favorite_cleaners_
            # {HIST.LG1_first_I_ve_got_a_stack_of_business_cards_from_my_favorite_cleaners__time} => HIST.LG1_first_I_ve_got_a_stack_of_business_cards_from_my_favorite_cleaners__time
            if np.isnan(history[{HIST.LG1_first_I_ve_got_a_stack_of_business_cards_from_my_favorite_cleaners__time}]):
                history[{HIST.LG1_first_I_ve_got_a_stack_of_business_cards_from_my_favorite_cleaners__time}] = x_et[i]
                if i != (num_events - 1):
                    history[{HIST.LG1_first_I_ve_got_a_stack_of_business_cards_from_my_favorite_cleaners__duration}] = x_et[i+1] - x_et[i]
                history[{HIST.LG1_first_I_ve_got_a_stack_of_business_cards_from_my_favorite_cleaners__index}] = x_index[i]
                
        elif x_text_numerical[i] == {TEXT.Where_did_you_get_that_coffee_question_}:
            # {TEXT.Where_did_you_get_that_coffee_question_} => TEXT.Where_did_you_get_that_coffee_question_
            # {HIST.LG1_first_Where_did_you_get_that_coffee_question__time} => HIST.LG1_first_Where_did_you_get_that_coffee_question__time
            if np.isnan(history[{HIST.LG1_first_Where_did_you_get_that_coffee_question__time}]):
                history[{HIST.LG1_first_Where_did_you_get_that_coffee_question__time}] = x_et[i]
                if i != (num_events - 1):
                    history[{HIST.LG1_first_Where_did_you_get_that_coffee_question__duration}] = x_et[i+1] - x_et[i]
                history[{HIST.LG1_first_Where_did_you_get_that_coffee_question__index}] = x_index[i]
        
        # {FQIDS.magnify} => FQIDS.magnify
        if x_fqid[i] == {FQIDS.magnify}:
            if i != (num_events - 1):
                event_duration = x_et[i+1] - x_et[i]
                # {HIST.LG1_max_fqid_magnify_duration} => HIST.LG1_max_fqid_magnify_duration
                if np.isnan(history[{HIST.LG1_max_fqid_magnify_duration}]) or event_duration > history[{HIST.LG1_max_fqid_magnify_duration}]:
                    history[{HIST.LG1_max_fqid_magnify_duration}] = event_duration
                   
        elif x_fqid[i] == {FQIDS.businesscards_card_1_next}: # {FQIDS.businesscards_card_1_next} => FQIDS.businesscards_card_1_next
            if i != (num_events - 1):
                # {HIST.LG1_first_fqid_businesscards_card_1_next_duration} => HIST.LG1_first_fqid_businesscards_card_1_next_duration
                if np.isnan(history[{HIST.LG1_first_fqid_businesscards_card_1_next_duration}]):
                    history[{HIST.LG1_first_fqid_businesscards_card_1_next_duration}] = x_et[i+1] - x_et[i]
                   
        elif x_fqid[i] == {FQIDS.journals}: # {FQIDS.journals} => FQIDS.journals
            if i != (num_events - 1):
                event_duration = x_et[i+1] - x_et[i]
                # {HIST.LG1_max_fqid_journals_duration} => HIST.LG1_max_fqid_journals_duration
                if np.isnan(history[{HIST.LG1_max_fqid_journals_duration}]) or event_duration > history[{HIST.LG1_max_fqid_journals_duration}]:
                    history[{HIST.LG1_max_fqid_journals_duration}] = event_duration
        
        # {TEXT.Here_s_the_log_book_}, {FQIDS.logbook_page_bingo} => TEXT.Here_s_the_log_book_, FQIDS.logbook_page_bingo
        if (x_text_numerical[i] == {TEXT.Here_s_the_log_book_}) or (x_fqid[i] == {FQIDS.logbook_page_bingo}):
            # {HIST.LG1_logbook_page_bingo_start_time} => HIST.LG1_logbook_page_bingo_start_time
            if np.isnan(history[{HIST.LG1_logbook_page_bingo_start_time}]):
                history[{HIST.LG1_logbook_page_bingo_start_time}] = x_et[i]                
            # {HIST.LG1_logbook_page_bingo_end_time} => HIST.LG1_logbook_page_bingo_end_time
            history[{HIST.LG1_logbook_page_bingo_end_time}] = x_et[i]
        
        # {EVENT_NAMES.navigate_click}, {FQIDS.reader}, {FQIDS.reader_paper2_bingo} => EVENT_NAMES.navigate_click, FQIDS.reader, FQIDS.reader_paper2_bingo
        if ((x_en[i] == {EVENT_NAMES.navigate_click}) and (x_fqid[i] == {FQIDS.reader})) or (x_fqid[i] == {FQIDS.reader_paper2_bingo}):
            # {HIST.LG1_reader_paper2_bingo_start_time} => HIST.LG1_reader_paper2_bingo_start_time
            if np.isnan(history[{HIST.LG1_reader_paper2_bingo_start_time}]):
                history[{HIST.LG1_reader_paper2_bingo_start_time}] = x_et[i]
            # {HIST.LG1_reader_paper2_bingo_end_time} => HIST.LG1_reader_paper2_bingo_end_time
            history[{HIST.LG1_reader_paper2_bingo_end_time}] = x_et[i]
    
    history[{HIST.LG1_first_event_time}] = x_et[0] # {HIST.LG1_first_event_time} => HIST.LG1_first_event_time
    history[{HIST.LG1_first_event_index}] = x_index[0] # {HIST.LG1_first_event_index} => HIST.LG1_first_event_index
    history[{HIST.LG1_last_event_time}] = x_et[-1] # {HIST.LG1_last_event_time} => HIST.LG1_last_event_time
    history[{HIST.LG1_last_event_index}] = x_index[-1] # {HIST.LG1_last_event_index} => HIST.LG1_last_event_index
"""

SRC_LIST.append(src)

In [41]:
src = """
@numba.jit(nopython=True, nogil=True, error_model='numpy', parallel=False)
def lg1_per_level_fill_history(
    lg,
    level,
    history,
    x_et,
    x_index,
    x_text_numerical,
    x_en,
    x_n,
    x_fqid,
    x_room_coor_x,
    x_room_coor_y,
    x_b,
    x_r,
):

    assert lg == 1

    num_events = x_et.shape[0]
    for i in range(num_events):

        if level[i] == 6:
            # {FQIDS.magnify} => FQIDS.magnify
            if x_fqid[i] == {FQIDS.magnify}:
                # {HIST.LG1_L6_last_magnify_time} => HIST.LG1_L6_last_magnify_time
                history[{HIST.LG1_L6_last_magnify_time}] = x_et[i]
                
            elif x_fqid[i] == {FQIDS.tobasement}: # {FQIDS.tobasement} => FQIDS.tobasement
                # {HIST.LG1_L6_first_time_leave_closet_dirty} => HIST.LG1_L6_first_time_leave_closet_dirty
                if np.isnan(history[{HIST.LG1_L6_first_time_leave_closet_dirty}]):
                    history[{HIST.LG1_L6_first_time_leave_closet_dirty}] = x_et[i]
            
            # {TEXT.Ah__that_s_better_exclamation_} => TEXT.Ah__that_s_better_exclamation_
            if x_text_numerical[i] == {TEXT.Ah__that_s_better_exclamation_}:
                # {HIST.LG1_L6_Ah__that_s_better_exclamation__previous_time} => HIST.LG1_L6_Ah__that_s_better_exclamation__previous_time
                if np.isnan(history[{HIST.LG1_L6_Ah__that_s_better_exclamation__previous_time}]):
                    if i > 0:
                        history[{HIST.LG1_L6_Ah__that_s_better_exclamation__previous_time}] = x_et[i - 1]    
                        
            elif x_text_numerical[i] == {TEXT.He_s_our_expert_record_keeper_}:
                # {TEXT.He_s_our_expert_record_keeper_} => TEXT.He_s_our_expert_record_keeper_
                if np.isnan(history[{HIST.LG1_L6_first_finished_talking_to_gramps}]):
                    # {HIST.LG1_L6_first_finished_talking_to_gramps} => HIST.LG1_L6_first_finished_talking_to_gramps
                    history[{HIST.LG1_L6_first_finished_talking_to_gramps}] = x_et[i]
                        
            if x_fqid[i] == {FQIDS.trigger_scarf}: # {FQIDS.trigger_scarf} => FQIDS.trigger_scarf
                # {HIST.LG1_L6_last_trigger_scarf_time} => HIST.LG1_L6_last_trigger_scarf_time
                history[{HIST.LG1_L6_last_trigger_scarf_time}] = x_et[i]            
                if np.isnan(history[{HIST.LG1_L6_first_trigger_scarf_prev_time}]):
                    if (i - 1) >= 0:
                        history[{HIST.LG1_L6_first_trigger_scarf_prev_time}] = x_et[i - 1]
                        
                if (i + 1) < num_events:
                    # {HIST.LG1_L6_last_trigger_scarf_next_room_coor_x} => HIST.LG1_L6_last_trigger_scarf_next_room_coor_x
                    history[{HIST.LG1_L6_last_trigger_scarf_next_room_coor_x}] = x_room_coor_x[i + 1]
                   
            if x_fqid[i] == {FQIDS.toentry}: # {FQIDS.toentry} => FQIDS.toentry
                # {HIST.LG1_L6_first_toentry_time} => HIST.LG1_L6_first_toentry_time
                if np.isnan(history[{HIST.LG1_L6_first_toentry_time}]):
                    history[{HIST.LG1_L6_first_toentry_time}] = x_et[i]
            
            # {TEXT.I_bet_the_archivist_could_use_this_exclamation_} => TEXT.I_bet_the_archivist_could_use_this_exclamation_
            if x_text_numerical[i] == {TEXT.I_bet_the_archivist_could_use_this_exclamation_}:
                if i + 1 < num_events:
                    # {HIST.LG1_L6_first_y_room_coords_of_next_click_after_finding_magnifying_glass} => HIST.LG1_L6_first_y_room_coords_of_next_click_after_finding_magnifying_glass
                    if np.isnan(history[{HIST.LG1_L6_first_y_room_coords_of_next_click_after_finding_magnifying_glass}]):
                        history[{HIST.LG1_L6_first_y_room_coords_of_next_click_after_finding_magnifying_glass}] = x_room_coor_y[i + 1]
            
            # {TEXT.Now_if_only_I_could_read_this_thing_} => TEXT.Now_if_only_I_could_read_this_thing_
            # {TEXT.Now_if_only_I_could_read_this_thing__Blasted_tiny_letters___} => TEXT.Now_if_only_I_could_read_this_thing__Blasted_tiny_letters___
            if (
                x_text_numerical[i] == {TEXT.Now_if_only_I_could_read_this_thing_}
                or x_text_numerical[i] == {TEXT.Now_if_only_I_could_read_this_thing__Blasted_tiny_letters___}
            ):
                if i + 1 < num_events:
                    # {HIST.LG1_L6_first_y_room_coords_of_next_click_after_archivist_mentions_difficulty_reading} => HIST.LG1_L6_first_y_room_coords_of_next_click_after_archivist_mentions_difficulty_reading
                    if np.isnan(history[{HIST.LG1_L6_first_y_room_coords_of_next_click_after_archivist_mentions_difficulty_reading}]):
                        history[{HIST.LG1_L6_first_y_room_coords_of_next_click_after_archivist_mentions_difficulty_reading}] = x_room_coor_y[i + 1]
                    
            if x_fqid[i] == {FQIDS.NaN}: # {FQIDS.NaN} => FQIDS.NaN
                # {HIST.LG1_L6_first_y_room_coords_for_non_nan_fqid} => HIST.LG1_L6_first_y_room_coords_for_non_nan_fqid
                if np.isnan(history[{HIST.LG1_L6_first_y_room_coords_for_non_nan_fqid}]):
                        history[{HIST.LG1_L6_first_y_room_coords_for_non_nan_fqid}] = x_room_coor_y[i]
                
            
        elif level[i] == 7:
            # {FQIDS.tostacks} => FQIDS.tostacks
            if x_fqid[i] == {FQIDS.tostacks}:
                # {HIST.LG1_L7_first_tostacks_time} => HIST.LG1_L7_first_tostacks_time
                if np.isnan(history[{HIST.LG1_L7_first_tostacks_time}]):
                    history[{HIST.LG1_L7_first_tostacks_time}] = x_et[i]
            elif x_fqid[i] == {FQIDS.toentry}: # {FQIDS.toentry} => FQIDS.toentry
                # {HIST.LG1_L7_first_toentry_time} => HIST.LG1_L7_first_toentry_time
                if np.isnan(history[{HIST.LG1_L7_first_toentry_time}]):
                    history[{HIST.LG1_L7_first_toentry_time}] = x_et[i]
            elif x_fqid[i] == {FQIDS.businesscards}: # {FQIDS.businesscards} => FQIDS.businesscards
                # {HIST.LG1_L7_first_businesscards_time} => HIST.LG1_L7_first_businesscards_time
                if np.isnan(history[{HIST.LG1_L7_first_businesscards_time}]):
                    history[{HIST.LG1_L7_first_businesscards_time}] = x_et[i]
                    
            
            # {TEXT.Hello_there_exclamation_} => TEXT.Hello_there_exclamation_
            if x_text_numerical[i] == {TEXT.Hello_there_exclamation_}:
                # {HIST.LG1_L7_first_Hello_there_exclamation_prev_time} => HIST.LG1_L7_first_Hello_there_exclamation_prev_time
                if np.isnan(history[{HIST.LG1_L7_first_Hello_there_exclamation_prev_time}]):
                    if (i - i) >= 0:
                        history[{HIST.LG1_L7_first_Hello_there_exclamation_prev_time}] = x_et[i - 1]
            elif x_text_numerical[i] == {TEXT.Why_don_t_you_take_a_look_question_}:
                # {TEXT.Why_don_t_you_take_a_look_question_} => TEXT.Why_don_t_you_take_a_look_question_
                # {HIST.LG1_L7_first_Why_don_t_you_take_a_look_question__time} => HIST.LG1_L7_first_Why_don_t_you_take_a_look_question__time
                if np.isnan(history[{HIST.LG1_L7_first_Why_don_t_you_take_a_look_question__time}]):
                    history[{HIST.LG1_L7_first_Why_don_t_you_take_a_look_question__time}] = x_et[i]
            
            # {BUILDINGS.humanecology}, {ROOMS.frontdesk} => BUILDINGS.humanecology, ROOMS.frontdesk
            if x_b[i] == {BUILDINGS.humanecology} and x_r[i] == {ROOMS.frontdesk}:
                if i + 1 < num_events:
                    # {HIST.LG1_L7_first_y_room_coords_of_next_click_after_arriving_at_humanecology_frontdesk} => HIST.LG1_L7_first_y_room_coords_of_next_click_after_arriving_at_humanecology_frontdesk
                    if np.isnan(history[{HIST.LG1_L7_first_y_room_coords_of_next_click_after_arriving_at_humanecology_frontdesk}]):
                        history[{HIST.LG1_L7_first_y_room_coords_of_next_click_after_arriving_at_humanecology_frontdesk}] = x_room_coor_y[i + 1]
            
            
        elif level[i] == 8:
            # {TEXT.Here_s_the_log_book_} => TEXT.Here_s_the_log_book_
            if x_text_numerical[i] == {TEXT.Here_s_the_log_book_}:
                # {HIST.LG1_L8_first_time_told_about_logbook} => HIST.LG1_L8_first_time_told_about_logbook
                if np.isnan(history[{HIST.LG1_L8_first_time_told_about_logbook}]):
                    history[{HIST.LG1_L8_first_time_told_about_logbook}] = x_et[i]
                    
            if x_fqid[i] == {FQIDS.logbook}: # {FQIDS.logbook} => FQIDS.logbook
                # {HIST.LG1_L8_first_logbook_time} => HIST.LG1_L8_first_logbook_time
                if np.isnan(history[{HIST.LG1_L8_first_logbook_time}]):
                    history[{HIST.LG1_L8_first_logbook_time}] = x_et[i]
                    
        elif level[i] == 9:
            
            if x_fqid[i] == {FQIDS.reader}: # {FQIDS.reader} => FQIDS.reader
                # {HIST.LG1_L9_first_reader_index} => HIST.LG1_L9_first_reader_index
                if np.isnan(history[{HIST.LG1_L9_first_reader_index}]):
                    history[{HIST.LG1_L9_first_reader_index}] = x_index[i]    
            elif x_fqid[i] == {FQIDS.reader_paper2_bingo} and x_en[i] == {EVENT_NAMES.object_click}:
                # {FQIDS.reader_paper2_bingo}, {EVENT_NAMES.object_click} => FQIDS.reader_paper2_bingo, EVENT_NAMES.object_click
                # {HIST.LG1_L9_first_click_reader_paper2_bingo_index} => HIST.LG1_L9_first_click_reader_paper2_bingo_index
                if np.isnan(history[{HIST.LG1_L9_first_click_reader_paper2_bingo_index}]):
                    history[{HIST.LG1_L9_first_click_reader_paper2_bingo_index}] = x_index[i]
                    
                    
                    
        elif level[i] == 10:
            # {TEXT.Wells_exclamation__What_was_he_doing_here_question__I_should_ask_the_librarian_} => TEXT.Wells_exclamation__What_was_he_doing_here_question__I_should_ask_the_librarian_
            if x_text_numerical[i] == {TEXT.Wells_exclamation__What_was_he_doing_here_question__I_should_ask_the_librarian_}:
                # {HIST.LG1_L10_first_Wells_exclamation__What_was_he_doing_here_question__I_should_ask_the_librarian_time} => HIST.LG1_L10_first_Wells_exclamation__What_was_he_doing_here_question__I_should_ask_the_librarian_time
                if np.isnan(history[{HIST.LG1_L10_first_Wells_exclamation__What_was_he_doing_here_question__I_should_ask_the_librarian_time}]):
                    history[{HIST.LG1_L10_first_Wells_exclamation__What_was_he_doing_here_question__I_should_ask_the_librarian_time}] = x_et[i]
            
            # {FQIDS.worker}, {EVENT_NAMES.navigate_click} => FQIDS.worker, EVENT_NAMES.navigate_click
            if x_fqid[i] == {FQIDS.worker} and x_en[i] == {EVENT_NAMES.navigate_click}:
                # {HIST.LG1_L10_starts_speaking_to_librarian_time} => HIST.LG1_L10_starts_speaking_to_librarian_time
                if np.isnan(history[{HIST.LG1_L10_starts_speaking_to_librarian_time}]):
                    history[{HIST.LG1_L10_starts_speaking_to_librarian_time}] = x_et[i]
                    
        elif level[i] == 11:
            # {TEXT.Hey__this_is_Youmans_exclamation_} => TEXT.Hey__this_is_Youmans_exclamation_
            if x_text_numerical[i] == {TEXT.Hey__this_is_Youmans_exclamation_}:
                # {HIST.LG1_L11_first_Hey__this_is_Youmans_exclamation__time} => HIST.LG1_L11_first_Hey__this_is_Youmans_exclamation__time
                if np.isnan(history[{HIST.LG1_L11_first_Hey__this_is_Youmans_exclamation__time}]):
                    history[{HIST.LG1_L11_first_Hey__this_is_Youmans_exclamation__time}] = x_et[i]
                    
                if (i - 1) >= 0:
                    # {HIST.LG1_L11_first_Hey__this_is_Youmans_exclamation__prev_time} => HIST.LG1_L11_first_Hey__this_is_Youmans_exclamation__prev_time
                    if np.isnan(history[{HIST.LG1_L11_first_Hey__this_is_Youmans_exclamation__prev_time}]):
                        history[{HIST.LG1_L11_first_Hey__this_is_Youmans_exclamation__prev_time}] = x_et[i - 1]
            
            # {TEXT.I_should_go_to_the_Capitol_and_tell_everyone_exclamation_} => TEXT.I_should_go_to_the_Capitol_and_tell_everyone_exclamation_
            if x_text_numerical[i] == {TEXT.I_should_go_to_the_Capitol_and_tell_everyone_exclamation_}:
                # {HIST.LG1_L11_first_I_should_go_to_the_Capitol_and_tell_everyone_exclamation__time} => HIST.LG1_L11_first_I_should_go_to_the_Capitol_and_tell_everyone_exclamation__time
                if np.isnan(history[{HIST.LG1_L11_first_I_should_go_to_the_Capitol_and_tell_everyone_exclamation__time}]):
                    history[{HIST.LG1_L11_first_I_should_go_to_the_Capitol_and_tell_everyone_exclamation__time}] = x_et[i]
                    
                    
        elif level[i] == 12:
            # {HIST.LG1_L12_start_time} => HIST.LG1_L12_start_time
            if np.isnan(history[{HIST.LG1_L12_start_time}]):
                history[{HIST.LG1_L12_start_time}] = x_et[i]
            
            # {BUILDINGS.capitol_1}, {ROOMS.hall} => BUILDINGS.capitol_1, ROOMS.hall
            if x_b[i] == {BUILDINGS.capitol_1} and x_r[i] == {ROOMS.hall}:
                # {HIST.LG1_L12_first_reach_capitol_1_hall_time} => HIST.LG1_L12_first_reach_capitol_1_hall_time
                if np.isnan(history[{HIST.LG1_L12_first_reach_capitol_1_hall_time}]):
                    history[{HIST.LG1_L12_first_reach_capitol_1_hall_time}] = x_et[i]
        
            if x_en[i] == {EVENT_NAMES.map_click}:
                # {HIST.LG1_L12_first_map_click_room_coord_x} => HIST.LG1_L12_first_map_click_room_coord_x
                if np.isnan(history[{HIST.LG1_L12_first_map_click_room_coord_x}]):
                    history[{HIST.LG1_L12_first_map_click_room_coord_x}] = x_room_coor_x[i]
                    
            if np.isnan(history[{HIST.LG1_L12_first_x_room_coords}]):
                # {HIST.LG1_L12_first_x_room_coords} => HIST.LG1_L12_first_x_room_coords
                history[{HIST.LG1_L12_first_x_room_coords}] = x_room_coor_x[i]
                
"""
                            
SRC_LIST.append(src)

In [42]:
src = """
@numba.jit(nopython=True, nogil=True, error_model='numpy', parallel=False)
def calculate_LG1_features(
    lg,
    level,
    out,
    history,
    x_et,
    x_index,
    x_text_numerical,
    x_en,
    x_n,
    x_fqid,
    x_t,
    x_room_coor_x,
    x_room_coor_y,
    x_b,
    x_r,
    x_text_fqid_numerical,
):
    
    if lg == 1:
        
        microfiche_page_durations, microfiche_num_clicks_on_page, _, _ = get_microfiche_page_times(
            x_en,
            x_n,
            x_fqid,
            x_t,
        )
        
        # {HIST.LG1_microfiche_page_2_duration} => HIST.LG1_microfiche_page_2_duration
        history[{HIST.LG1_microfiche_page_2_duration}] = microfiche_page_durations[2]
        # {HIST.LG1_microfiche_total_clicks} => HIST.LG1_microfiche_total_clicks
        history[{HIST.LG1_microfiche_total_clicks}] = microfiche_num_clicks_on_page.sum()
        
        lg1_fill_history(
            lg,
            history,
            x_et,
            x_index,
            x_text_numerical,
            x_fqid,
            x_text_fqid_numerical,
            x_en,
        )
        
        lg1_per_level_fill_history(
            lg,
            level,
            history,
            x_et,
            x_index,
            x_text_numerical,
            x_en,
            x_n,
            x_fqid,
            x_room_coor_x,
            x_room_coor_y,
            x_b,
            x_r,
        )
        

    if lg >= 1:
        # {FN.LG0_to_LG1_gap_duration}, {HIST.LG1_first_event_time} => FN.LG0_to_LG1_gap_duration, HIST.LG1_first_event_time
        # {HIST.LG0_last_event_time} => HIST.LG0_last_event_time
        out[:, {FN.LG0_to_LG1_gap_duration}] = history[{HIST.LG1_first_event_time}] - history[{HIST.LG0_last_event_time}]
        
        # {FN.LG1_first_Who_could_ve_done_this_question__duration} => FN.LG1_first_Who_could_ve_done_this_question__duration
        out[:, {FN.LG1_first_Who_could_ve_done_this_question__duration}] = history[{HIST.LG1_first_Who_could_ve_done_this_question__duration}]
        
        # {FN.LG1_first_I_ve_got_a_stack_of_business_cards_from_my_favorite_cleaners__duration} => FN.LG1_first_I_ve_got_a_stack_of_business_cards_from_my_favorite_cleaners__duration
        out[:, {FN.LG1_first_I_ve_got_a_stack_of_business_cards_from_my_favorite_cleaners__duration}] = (
            history[{HIST.LG1_first_I_ve_got_a_stack_of_business_cards_from_my_favorite_cleaners__duration}]
        )
        
        # {FN.LG1_first_Where_did_you_get_that_coffee_question__duration} => FN.LG1_first_Where_did_you_get_that_coffee_question__duration
        out[:, {FN.LG1_first_Where_did_you_get_that_coffee_question__duration}] = (
            history[{HIST.LG1_first_Where_did_you_get_that_coffee_question__duration}]
        )
        
        # {FN.LG1_microfiche_total_clicks} => FN.LG1_microfiche_total_clicks
        out[:, {FN.LG1_microfiche_total_clicks}] = history[{HIST.LG1_microfiche_total_clicks}]
        
        # {FN.LG1_max_fqid_journals_duration} => FN.LG1_max_fqid_journals_duration
        out[:, {FN.LG1_max_fqid_journals_duration}] = history[{HIST.LG1_max_fqid_journals_duration}]
        
        # {FN.LG1_max_fqid_magnify_duration} => FN.LG1_max_fqid_magnify_duration
        out[:, {FN.LG1_max_fqid_magnify_duration}] = history[{HIST.LG1_max_fqid_magnify_duration}]
        
        # {FN.LG1_first_fqid_businesscards_card_1_next_duration} => FN.LG1_first_fqid_businesscards_card_1_next_duration
        out[:, {FN.LG1_first_fqid_businesscards_card_1_next_duration}] = history[{HIST.LG1_first_fqid_businesscards_card_1_next_duration}]
        
        # {FN.LG1_L6_time_between_clicking_on_magnifying_glass_and_then_clicking_on_archivist_again} => FN.LG1_L6_time_between_clicking_on_magnifying_glass_and_then_clicking_on_archivist_again
        out[:, {FN.LG1_L6_time_between_clicking_on_magnifying_glass_and_then_clicking_on_archivist_again}] = (
            history[{HIST.LG1_L6_Ah__that_s_better_exclamation__previous_time}] - history[{HIST.LG1_L6_last_magnify_time}]
        ) # {HIST.LG1_L6_Ah__that_s_better_exclamation__previous_time}, {HIST.LG1_L6_last_magnify_time} => HIST.LG1_L6_Ah__that_s_better_exclamation__previous_time, HIST.LG1_L6_last_magnify_time
        
        # {FN.LG1_L6_time_between_first_finish_talking_to_gramps_and_leaving_closet_dirty} => FN.LG1_L6_time_between_first_finish_talking_to_gramps_and_leaving_closet_dirty
        out[:, {FN.LG1_L6_time_between_first_finish_talking_to_gramps_and_leaving_closet_dirty}] = (
            history[{HIST.LG1_L6_first_time_leave_closet_dirty}] - history[{HIST.LG1_L6_first_finished_talking_to_gramps}]
        ) # {HIST.LG1_L6_first_time_leave_closet_dirty}, {HIST.LG1_L6_first_finished_talking_to_gramps} => HIST.LG1_L6_first_time_leave_closet_dirty, HIST.LG1_L6_first_finished_talking_to_gramps
        
        # {FN.LG1_L6_number_of_events_between_opening_reader_and_clicking_on_paper2_bingo} => FN.LG1_L6_number_of_events_between_opening_reader_and_clicking_on_paper2_bingo
        out[:, {FN.LG1_L6_number_of_events_between_opening_reader_and_clicking_on_paper2_bingo}] = (
            history[{HIST.LG1_L9_first_click_reader_paper2_bingo_index}] - history[{HIST.LG1_L9_first_reader_index}]
        ) # {HIST.LG1_L9_first_click_reader_paper2_bingo_index}, {HIST.LG1_L9_first_reader_index} => HIST.LG1_L9_first_click_reader_paper2_bingo_index, HIST.LG1_L9_first_reader_index
        
        # {FN.LG1_L10_time_between_being_told_to_talk_to_librarian_and_speaking_to_librarian} => FN.LG1_L10_time_between_being_told_to_talk_to_librarian_and_speaking_to_librarian
        out[:, {FN.LG1_L10_time_between_being_told_to_talk_to_librarian_and_speaking_to_librarian}] = (
            history[{HIST.LG1_L10_starts_speaking_to_librarian_time}]
            - history[{HIST.LG1_L10_first_Wells_exclamation__What_was_he_doing_here_question__I_should_ask_the_librarian_time}]
        ) # {HIST.LG1_L10_starts_speaking_to_librarian_time} => HIST.LG1_L10_starts_speaking_to_librarian_time
        # {HIST.LG1_L10_first_Wells_exclamation__What_was_he_doing_here_question__I_should_ask_the_librarian_time} => HIST.LG1_L10_first_Wells_exclamation__What_was_he_doing_here_question__I_should_ask_the_librarian_time
        
        # {FN.LG1_first_Nope__But_Youmans_and_other_suffragists_worked_hard_to_change_that__duration} => FN.LG1_first_Nope__But_Youmans_and_other_suffragists_worked_hard_to_change_that__duration
        out[:, {FN.LG1_first_Nope__But_Youmans_and_other_suffragists_worked_hard_to_change_that__duration}] = (
            history[{HIST.LG1_first_Nope__But_Youmans_and_other_suffragists_worked_hard_to_change_that__duration}]
        ) # {HIST.LG1_first_Nope__But_Youmans_and_other_suffragists_worked_hard_to_change_that__duration} => HIST.LG1_first_Nope__But_Youmans_and_other_suffragists_worked_hard_to_change_that__duration
        
        # {FN.LG1_first_Youmans_was_a_suffragist_here_in_Wisconsin_duration} => FN.LG1_first_Youmans_was_a_suffragist_here_in_Wisconsin_duration
        out[:, {FN.LG1_first_Youmans_was_a_suffragist_here_in_Wisconsin_duration}] = (
            history[{HIST.LG1_first_Youmans_was_a_suffragist_here_in_Wisconsin_duration}]
        ) # {HIST.LG1_first_Youmans_was_a_suffragist_here_in_Wisconsin_duration} => HIST.LG1_first_Youmans_was_a_suffragist_here_in_Wisconsin_duration
        
        # {HIST.LG1_microfiche_page_2_duration} => HIST.LG1_microfiche_page_2_duration
        # out[:, ] = history[{HIST.LG1_microfiche_page_2_duration}]
        
        # {FN.LG1_L6_trigger_scarf_duration} => FN.LG1_L6_trigger_scarf_duration
        out[:, {FN.LG1_L6_trigger_scarf_duration}] = (
            history[{HIST.LG1_L6_first_trigger_scarf_prev_time}] - history[{HIST.LG1_L6_last_trigger_scarf_time}]
        ) # {HIST.LG1_L6_first_trigger_scarf_prev_time} => HIST.LG1_L6_first_trigger_scarf_prev_time
        # {HIST.LG1_L6_last_trigger_scarf_time} => HIST.LG1_L6_last_trigger_scarf_time
        
        # {FN.LG1_L6_last_trigger_scarf_next_room_coor_x} => FN.LG1_L6_last_trigger_scarf_next_room_coor_x
        out[:, {FN.LG1_L6_last_trigger_scarf_next_room_coor_x}] = history[{HIST.LG1_L6_last_trigger_scarf_next_room_coor_x}]
        
        # {FN.LG1_L6_last_trigger_scarf_next_room_coor_x} => FN.LG1_L6_last_trigger_scarf_next_room_coor_x
        out[:, {FN.LG1_L6_time_between_finish_talking_to_gramps_and_leaving_room}] = (
            history[{HIST.LG1_L6_first_toentry_time}] - history[{HIST.LG1_L6_first_finished_talking_to_gramps}]
        ) # {HIST.LG1_L6_first_toentry_time} => HIST.LG1_L6_first_toentry_time
        # {HIST.LG1_L6_first_finished_talking_to_gramps} => HIST.LG1_L6_first_finished_talking_to_gramps
        
        # {FN.LG1_L6_time_between_first_being_told_about_logbook_and_getting_to_logbook} => FN.LG1_L6_time_between_first_being_told_about_logbook_and_getting_to_logbook
        out[:, {FN.LG1_L6_time_between_first_being_told_about_logbook_and_getting_to_logbook}] = (
            history[{HIST.LG1_L8_first_logbook_time}] - history[{HIST.LG1_L8_first_time_told_about_logbook}]
        ) # {HIST.LG1_L8_first_logbook_time} => HIST.LG1_L8_first_logbook_time
        # HIST.LG1_L8_first_time_told_about_logbook
        
        # {FN.LG1_L11_time_spent_reading_bingo_text} => FN.LG1_L11_time_spent_reading_bingo_text
        out[:, {FN.LG1_L11_time_spent_reading_bingo_text}] = (
            history[{HIST.LG1_L11_first_I_should_go_to_the_Capitol_and_tell_everyone_exclamation__time}]
            - history[{HIST.LG1_L11_first_Hey__this_is_Youmans_exclamation__prev_time}]
        ) # {HIST.LG1_L11_first_I_should_go_to_the_Capitol_and_tell_everyone_exclamation__time} => HIST.LG1_L11_first_I_should_go_to_the_Capitol_and_tell_everyone_exclamation__time
        # {HIST.LG1_L11_first_Hey__this_is_Youmans_exclamation__prev_time} => HIST.LG1_L11_first_Hey__this_is_Youmans_exclamation__prev_time
        
        # {FN.LG1_L7_first_time_to_get_from_stacks_to_entry} => FN.LG1_L7_first_time_to_get_from_stacks_to_entry
        out[:, {FN.LG1_L7_first_time_to_get_from_stacks_to_entry}] = (
            history[{HIST.LG1_L7_first_toentry_time}] - history[{HIST.LG1_L7_first_tostacks_time}]
        ) # {HIST.LG1_L7_first_toentry_time}, {HIST.LG1_L7_first_tostacks_time} => HIST.LG1_L7_first_toentry_time, HIST.LG1_L7_first_tostacks_time
        
        # {FN.LG1_L7_first_time_to_read_workers_text} => FN.LG1_L7_first_time_to_read_workers_text
        out[:, {FN.LG1_L7_first_time_to_read_workers_text}] = (
            history[{HIST.LG1_L7_first_Why_don_t_you_take_a_look_question__time}] - history[{HIST.LG1_L7_first_Hello_there_exclamation_prev_time}]
        ) # {HIST.LG1_L7_first_Why_don_t_you_take_a_look_question__time} => HIST.LG1_L7_first_Why_don_t_you_take_a_look_question__time
        # {HIST.LG1_L7_first_Hello_there_exclamation_prev_time} => HIST.LG1_L7_first_Hello_there_exclamation_prev_time
        
        # {FN.LG1_L7_first_time_from_being_told_about_business_cards_to_clicking_on_business_cards} => FN.LG1_L7_first_time_from_being_told_about_business_cards_to_clicking_on_business_cards
        out[:, {FN.LG1_L7_first_time_from_being_told_about_business_cards_to_clicking_on_business_cards}] = (
            history[{HIST.LG1_L7_first_businesscards_time}] - history[{HIST.LG1_L7_first_Why_don_t_you_take_a_look_question__time}]
        ) # {HIST.LG1_L7_first_businesscards_time} => HIST.LG1_L7_first_businesscards_time
        # {HIST.LG1_L7_first_Why_don_t_you_take_a_look_question__time} => HIST.LG1_L7_first_Why_don_t_you_take_a_look_question__time
        
        # {FN.LG1_L12_time_to_reach_capitol_1_hall_from_start_of_level} => FN.LG1_L12_time_to_reach_capitol_1_hall_from_start_of_level
        out[:, {FN.LG1_L12_time_to_reach_capitol_1_hall_from_start_of_level}] = (
            history[{HIST.LG1_L12_first_reach_capitol_1_hall_time}] - history[{HIST.LG1_L12_start_time}]
        ) # {HIST.LG1_L12_first_reach_capitol_1_hall_time}, {HIST.LG1_L12_start_time} => HIST.LG1_L12_first_reach_capitol_1_hall_time, HIST.LG1_L12_start_time
        
        # {FN.LG1_L12_first_map_click_room_coord_x} => FN.LG1_L12_first_map_click_room_coord_x
        out[:, {FN.LG1_L12_first_map_click_room_coord_x}] = history[{HIST.LG1_L12_first_map_click_room_coord_x}]
        
        # {FN.LG1_L6_first_y_room_coords_of_next_click_after_finding_magnifying_glass} => FN.LG1_L6_first_y_room_coords_of_next_click_after_finding_magnifying_glass
        out[:, {FN.LG1_L6_first_y_room_coords_of_next_click_after_finding_magnifying_glass}] = (
            history[{HIST.LG1_L6_first_y_room_coords_of_next_click_after_finding_magnifying_glass}]
        ) # {HIST.LG1_L6_first_y_room_coords_of_next_click_after_finding_magnifying_glass} => HIST.LG1_L6_first_y_room_coords_of_next_click_after_finding_magnifying_glass
        
        # {FN.LG1_L6_first_y_room_coords_of_next_click_after_archivist_mentions_difficulty_reading} => FN.LG1_L6_first_y_room_coords_of_next_click_after_archivist_mentions_difficulty_reading
        out[:, {FN.LG1_L6_first_y_room_coords_of_next_click_after_archivist_mentions_difficulty_reading}] = (
            history[{HIST.LG1_L6_first_y_room_coords_of_next_click_after_archivist_mentions_difficulty_reading}]
        ) # {HIST.LG1_L6_first_y_room_coords_of_next_click_after_archivist_mentions_difficulty_reading} => HIST.LG1_L6_first_y_room_coords_of_next_click_after_archivist_mentions_difficulty_reading
        
        # {FN.LG1_L7_first_y_room_coords_of_next_click_after_arriving_at_humanecology_frontdesk} => FN.LG1_L7_first_y_room_coords_of_next_click_after_arriving_at_humanecology_frontdesk
        out[:, {FN.LG1_L7_first_y_room_coords_of_next_click_after_arriving_at_humanecology_frontdesk}] = (
            history[{HIST.LG1_L7_first_y_room_coords_of_next_click_after_arriving_at_humanecology_frontdesk}]
        ) # {HIST.LG1_L7_first_y_room_coords_of_next_click_after_arriving_at_humanecology_frontdesk} => HIST.LG1_L7_first_y_room_coords_of_next_click_after_arriving_at_humanecology_frontdesk
        
        # {FN.LG1_L6_first_y_room_coords_for_non_nan_fqid} => FN.LG1_L6_first_y_room_coords_for_non_nan_fqid
        out[:, {FN.LG1_L6_first_y_room_coords_for_non_nan_fqid}] =  history[{HIST.LG1_L6_first_y_room_coords_for_non_nan_fqid}]
        
        # {FN.LG1_L12_first_x_room_coords} => FN.LG1_L12_first_x_room_coords
        out[:, {FN.LG1_L12_first_x_room_coords}] =  history[{HIST.LG1_L12_first_x_room_coords}]
        
        # {FN.LG1_logbook_page_bingo_time_duration} => FN.LG1_logbook_page_bingo_time_duration
        out[:, {FN.LG1_logbook_page_bingo_time_duration}] = (
            history[{HIST.LG1_logbook_page_bingo_end_time}] - history[{HIST.LG1_logbook_page_bingo_start_time}]
        ) # {HIST.LG1_logbook_page_bingo_end_time}, {HIST.LG1_logbook_page_bingo_start_time} => HIST.LG1_logbook_page_bingo_end_time, HIST.LG1_logbook_page_bingo_start_time
        
        # {FN.LG1_reader_paper2_bingo_time_duration} => FN.LG1_reader_paper2_bingo_time_duration
        out[:, {FN.LG1_reader_paper2_bingo_time_duration}] = (
            history[{HIST.LG1_reader_paper2_bingo_end_time}] - history[{HIST.LG1_reader_paper2_bingo_start_time}]
        ) # {HIST.LG1_reader_paper2_bingo_end_time}, {HIST.LG1_reader_paper2_bingo_start_time} => HIST.LG1_reader_paper2_bingo_end_time, HIST.LG1_reader_paper2_bingo_start_time

"""

SRC_LIST.append(src)

In [43]:
src = """
@numba.jit(nopython=True, nogil=True, error_model='numpy', parallel=False)
def lg2_fill_history(
    lg,
    history,
    x_et,
    x_index,
    x_text_numerical,
):
    
    assert lg == 2
    
    num_events = x_et.shape[0]
    
    for i in range(num_events):
        # {TEXT.Hmm__You_could_try_the_Aldo_Leopold_Wildlife_Center_} => TEXT.Hmm__You_could_try_the_Aldo_Leopold_Wildlife_Center_
        if x_text_numerical[i] == {TEXT.Hmm__You_could_try_the_Aldo_Leopold_Wildlife_Center_}:
            # {HIST.LG2_first_Hmm__You_could_try_the_Aldo_Leopold_Wildlife_Center__time} => HIST.LG2_first_Hmm__You_could_try_the_Aldo_Leopold_Wildlife_Center__time
            if np.isnan(history[{HIST.LG2_first_Hmm__You_could_try_the_Aldo_Leopold_Wildlife_Center__time}]):
                history[{HIST.LG2_first_Hmm__You_could_try_the_Aldo_Leopold_Wildlife_Center__time}] = x_et[i]
                if i != (num_events - 1):
                    # {HIST.LG2_first_Hmm__You_could_try_the_Aldo_Leopold_Wildlife_Center__duration} => HIST.LG2_first_Hmm__You_could_try_the_Aldo_Leopold_Wildlife_Center__duration
                    history[{HIST.LG2_first_Hmm__You_could_try_the_Aldo_Leopold_Wildlife_Center__duration}] = x_et[i+1] - x_et[i]
                # {HIST.LG2_first_Hmm__You_could_try_the_Aldo_Leopold_Wildlife_Center__index} => HIST.LG2_first_Hmm__You_could_try_the_Aldo_Leopold_Wildlife_Center__index
                history[{HIST.LG2_first_Hmm__You_could_try_the_Aldo_Leopold_Wildlife_Center__index}] = x_index[i]
    
    history[{HIST.LG2_first_event_time}] = x_et[0] # {HIST.LG2_first_event_time} => HIST.LG2_first_event_time
    history[{HIST.LG2_first_event_index}] = x_index[0] # {HIST.LG2_first_event_index} => HIST.LG2_first_event_index
    history[{HIST.LG2_last_event_time}] = x_et[-1] # {HIST.LG2_last_event_time} => HIST.LG2_last_event_time
    history[{HIST.LG2_last_event_index}] = x_index[-1] # {HIST.LG2_last_event_index} => HIST.LG2_last_event_index
"""
        
SRC_LIST.append(src)

In [44]:
src = """
@numba.jit(nopython=True, nogil=True, error_model='numpy', parallel=False)
def lg2_per_level_fill_history(
    lg,
    level,
    history,
    x_et,
    x_index,
    x_text_numerical,
    x_en,
    x_n,
    x_fqid,
    x_room_coor_x,
    x_room_coor_y,
    x_b,
    x_r,
):

    assert lg == 2

    num_events = x_et.shape[0]
    for i in range(num_events):

        if level[i] == 15:
            # {FQIDS.directory}, {FQIDS.directory_closeup_archivist}, {EVENT_NAMES.object_click} => FQIDS.directory, FQIDS.directory_closeup_archivist, EVENT_NAMES.object_click
            if (x_fqid[i] == {FQIDS.directory} or x_fqid[i] == {FQIDS.directory_closeup_archivist}) and x_en[i] == {EVENT_NAMES.object_click}:
                # {HIST.LG1_L15_first_directory_y_coord_click} => HIST.LG1_L15_first_directory_y_coord_click
                if np.isnan(history[{HIST.LG1_L15_first_directory_y_coord_click}]):
                    history[{HIST.LG1_L15_first_directory_y_coord_click}] = x_room_coor_y[i]

        elif level[i] == 18:
            # {BUILDINGS.wildlife},{ROOMS.center} => BUILDINGS.wildlife, ROOMS.center
            if x_b[i] == {BUILDINGS.wildlife} and x_r[i] == {ROOMS.center}:
                # {HIST.LG2_L18_first_reach_wildlife_center_time} => HIST.LG2_L18_first_reach_wildlife_center_time
                if np.isnan(history[{HIST.LG2_L18_first_reach_wildlife_center_time}]):
                    history[{HIST.LG2_L18_first_reach_wildlife_center_time}] = x_et[i]
                    
            if x_fqid[i] == {FQIDS.coffee}: # {FQIDS.coffee} => FQIDS.coffee
                # {HIST.LG2_L18_first_coffee_time} => HIST.LG2_L18_first_coffee_time
                if np.isnan(history[{HIST.LG2_L18_first_coffee_time}]):
                    history[{HIST.LG2_L18_first_coffee_time}] = x_et[i]
                    
        elif level[i] == 21:
            # {FQIDS.journals_flag}, {EVENT_NAMES.object_click}, {NAMES.close} => FQIDS.journals_flag, EVENT_NAMES.object_click, NAMES.close
            if x_fqid[i] == {FQIDS.journals_flag} and x_en[i] == {EVENT_NAMES.object_click} and x_n[i] == {NAMES.close}:
                # {HIST.LG2_L21_first_journals_close_time} => HIST.LG2_L21_first_journals_close_time
                if np.isnan(history[{HIST.LG2_L21_first_journals_close_time}]):
                    history[{HIST.LG2_L21_first_journals_close_time}] = x_et[i]
            
        elif level[i] == 22:
            
            # {BUILDINGS.capitol_2}, {ROOMS.hall} => BUILDINGS.capitol_2, ROOMS.hall
            if x_b[i] == {BUILDINGS.capitol_2} and x_r[i] == {ROOMS.hall}:
                # {HIST.LG2_L22_first_reach_capitol_2_hall_time} => HIST.LG2_L22_first_reach_capitol_2_hall_time
                if np.isnan(history[{HIST.LG2_L22_first_reach_capitol_2_hall_time}]):
                    history[{HIST.LG2_L22_first_reach_capitol_2_hall_time}] = x_et[i]
        
"""

SRC_LIST.append(src)

In [45]:
src = """
@numba.jit(nopython=True, nogil=True, error_model='numpy', parallel=False)
def calculate_LG2_features(
    lg,
    level,
    out,
    history,
    x_et,
    x_index,
    x_text_numerical,
    x_en,
    x_n,
    x_fqid,
    x_t,
    x_room_coor_x,
    x_room_coor_y,
    x_b,
    x_r,
):
    
    if lg == 2:

        lg2_fill_history(
            lg,
            history,
            x_et,
            x_index,
            x_text_numerical,
        )
        
        lg2_per_level_fill_history(
            lg,
            level,
            history,
            x_et,
            x_index,
            x_text_numerical,
            x_en,
            x_n,
            x_fqid,
            x_room_coor_x,
            x_room_coor_y,
            x_b,
            x_r,
        )
        
        # {FN.LG2_first_Hmm__You_could_try_the_Aldo_Leopold_Wildlife_Center__duration} => FN.LG2_first_Hmm__You_could_try_the_Aldo_Leopold_Wildlife_Center__duration
        out[:, {FN.LG2_first_Hmm__You_could_try_the_Aldo_Leopold_Wildlife_Center__duration}] = (
            history[{HIST.LG2_first_Hmm__You_could_try_the_Aldo_Leopold_Wildlife_Center__duration}]
        )
        
        # {FN.LG2_L18_first_time_between_reaching_wildlife_center_and_clicking_on_coffee} => FN.LG2_L18_first_time_between_reaching_wildlife_center_and_clicking_on_coffee
        out[:, {FN.LG2_L18_first_time_between_reaching_wildlife_center_and_clicking_on_coffee}] = (
            history[{HIST.LG2_L18_first_coffee_time}] - history[{HIST.LG2_L18_first_reach_wildlife_center_time}]
        ) # {HIST.LG2_L18_first_coffee_time}, {HIST.LG2_L18_first_reach_wildlife_center_time} => HIST.LG2_L18_first_coffee_time, HIST.LG2_L18_first_reach_wildlife_center_time
        
        # {FN.LG2_L21_first_time_between_closing_journals_and_reaching_capitol_2_hall} => FN.LG2_L21_first_time_between_closing_journals_and_reaching_capitol_2_hall
        out[:, {FN.LG2_L21_first_time_between_closing_journals_and_reaching_capitol_2_hall}] = (
            history[{HIST.LG2_L22_first_reach_capitol_2_hall_time}] - history[{HIST.LG2_L21_first_journals_close_time}]
        ) # {HIST.LG2_L22_first_reach_capitol_2_hall_time}, {HIST.LG2_L21_first_journals_close_time} => HIST.LG2_L22_first_reach_capitol_2_hall_time, HIST.LG2_L21_first_journals_close_time
        
        # {FN.LG1_L15_first_directory_y_coord_click} => FN.LG1_L15_first_directory_y_coord_click
        out[:, {FN.LG1_L15_first_directory_y_coord_click}] = history[{HIST.LG1_L15_first_directory_y_coord_click}]
    
"""

SRC_LIST.append(src)

In [46]:
#print("\n".join(SRC_LIST))

### C Features

In [47]:
c_src, c_features_names, c_history_names = utils.get_code()

# c_features_names are available in file utils/c_features_names.ipynb.
# c_history_names are available in file utils/c_history_names.ipynb.
# c_src is available in file utils/c_src.ipynb.

print(len(c_features_names), len(c_history_names))

923 3066


In [48]:
FEATURE_NAMES[:5]

['number_of_events',
 'level_group',
 'question_number',
 'session_start_time',
 'session_end_time']

In [49]:
FEATURE_NAMES.extend(c_features_names)

In [50]:
HISTORY_LST[:5]

['LG0_first_We_need_to_talk_about_that_missing_paperwork_time',
 'LG0_first_We_need_to_talk_about_that_missing_paperwork_index',
 'LG0_first_We_need_to_talk_about_that_missing_paperwork_duration',
 'LG0_first_Leopold__why_don_t_you_help_me_set_up_in_the_Capitol_time',
 'LG0_first_Leopold__why_don_t_you_help_me_set_up_in_the_Capitol_index']

In [51]:
HISTORY_LST.extend(c_history_names)

### Create Feature and History Lookups

In [52]:
NUM_FEATURES = len(FEATURE_NAMES)

print(NUM_FEATURES)

class FeatureNameClass():
    
    def __init__(self):
        pass
    
FN = FeatureNameClass()

for i, f in enumerate(FEATURE_NAMES):
    FN.__setattr__(f, i)

# 1590

1590


In [53]:
NUM_FEATURES_ = len(HISTORY_LST)

print(NUM_FEATURES_)

class HistoryIndexClass():
    
    def __init__(self):
        pass
    
HIST = HistoryIndexClass()

for i, f in enumerate(HISTORY_LST):
    HIST.__setattr__(f, i)

# 3183

3183


### Compile

In [54]:
# ff = []
# for i, src in enumerate(SRC_LIST):
#     if i==0: 
#         print(src.format(FN=FN, HIST=HIST, FQIDS=FQIDS, TEXT_FQIDS=TEXT_FQIDS, ROOM_FQIDS=ROOM_FQIDS,
#                          EVENT_NAMES=EVENT_NAMES, NAMES=NAMES, ROOMS=ROOMS, TEXT=TEXT, BUILDINGS=BUILDINGS,))
#         break

In [55]:
#print("\n".join(ff))

In [56]:
for i, src in enumerate(SRC_LIST):
    # .format(...) => formats the specified value(s) and insert them inside the string's placeholder.
    exec(src.format(
        FN=FN,
        HIST=HIST,
        FQIDS=FQIDS,
        TEXT_FQIDS=TEXT_FQIDS,
        ROOM_FQIDS=ROOM_FQIDS,
        EVENT_NAMES=EVENT_NAMES,
        NAMES=NAMES,
        ROOMS=ROOMS,
        TEXT=TEXT,
        BUILDINGS=BUILDINGS,
    ))

In [57]:
c_src_formatted = c_src.format(
    FN=FN,
    HIST=HIST,
    FQIDS=FQIDS,
    TEXT_FQIDS=TEXT_FQIDS,
    ROOM_FQIDS=ROOM_FQIDS,
    EVENT_NAMES=EVENT_NAMES,
    NAMES=NAMES,
    NUM_FEATURES=NUM_FEATURES
)

# c_src_formatted is available in file utils/c_src_formatted.ipynb.

In [59]:
# print(c_src_formatted)

In [None]:
# for l in FEATURE_NAMES:
#     if 'LG0__LNone__fqid__chap1_finale__ETsinceprev__' in l:
#         print(l)
#         break

In [58]:
utils.compile_module(c_src_formatted, module_name='JoWilder_C_features', module_dir='modules/')# /kaggle/working

JoWilder_C_features.cpp: In function ‘void fill_history(long int, long int*, double*, long int*, long int*, long int*, long int*, long int*, long int*, double*, double*, double*, double*, long int*, long int*, long int*, long int*, double*, double*, long int)’:
 3687 |     float temp;
      |           ^~~~
 3688 |     double temp_mean;
      |            ^~~~~~~~~


In [60]:
#print(c_src_formatted)

In [61]:
from modules import JoWilder_C_features

In [97]:
exec(fqids_features_src.format(FN=FN, FQIDS=FQIDS))
exec(building_features_src.format(FN=FN))
exec(room_features_src.format(FN=FN))
exec(fqid_features_src.format(FN=FN))
exec(level_based_features_src.format(FN=FN))

In [95]:
cc = CC('JoWilder_numba_features')

In [98]:

src = f"""

QUESTIONS_SPLITS_PER_LEVEL = np.array([0, 3, 13, 18])

@cc.export(
    'process_single',
    '(i8, i8[::1], i8[::1], i8[::1], f8[::1], u1[::1], i8[::1], i8[::1], i8[::1], i8[::1], i8[::1], f8[::1], f8[::1], f8[::1], f8[::1], f8[::1], i8[::1], i8[::1], i8[::1], f8[::1], u1[::1], f4[:, ::1], f8[::1])'
)
@numba.jit(
    '(i8, i8[::1], i8[::1], i8[::1], f8[::1], u1[::1], i8[::1], i8[::1], i8[::1], i8[::1], i8[::1], f8[::1], f8[::1], f8[::1], f8[::1], f8[::1], i8[::1], i8[::1], i8[::1], f8[::1], u1[::1], f4[:, ::1], f8[::1])',
    nopython=True,nogil=True,error_model='numpy',parallel=False
)
def process_single(
    level_group_index,
    x_et,
    x_en,
    x_n,
    x_hover_duration,
    x_session_weekday,
    x_b,
    x_r,
    x_fqids,
    x_l,
    x_index,
    x_rc_x,
    x_rc_y,
    x_sc_x,
    x_sc_y,
    x_tl,
    x_text_numerical,
    x_text_fqid_numerical,
    x_room_fqid_numerical,
    x_page,
    x_hour,
    out,
    hist,
):
    number_of_events = x_et.shape[0]
    
    questions_start_number = QUESTIONS_SPLITS_PER_LEVEL[level_group_index]
    questions_end_number = QUESTIONS_SPLITS_PER_LEVEL[level_group_index + 1]
    question_number = np.arange(questions_start_number, questions_end_number)
    
    total_hover_duration = np.nansum(x_hover_duration)
    average_hover_duration = np.nanmean(x_hover_duration)
    session_duration = x_et[-1] - x_et[0]
    
    # {FN.session_hour} => FN.session_hour
    out[:, {FN.session_hour}] = x_hour[0]
    
    x_et_diff = np.diff(x_et)
    
    # {FN.session_index_level_group_first} => FN.session_index_level_group_first
    out[:, {FN.session_index_level_group_first}] = x_index[0]
    
    # {FN.session_index_level_group_last} => FN.session_index_level_group_last
    out[:, {FN.session_index_level_group_last}] = x_index[-1]
    
    # {FN.elapsed_time_sum} => FN.elapsed_time_sum
    out[:, {FN.elapsed_time_sum}] = x_et.sum()
    
    # {FN.elapsed_time_in_increasing_order} => FN.elapsed_time_in_increasing_order
    out[:, {FN.elapsed_time_in_increasing_order}] = np.all(x_et_diff > 0)
    
    # {FN.is_level_group_0} => FN.is_level_group_0
    out[:, {FN.is_level_group_0}] = level_group_index == 0
    
    # {FN.is_level_group_1} => FN.is_level_group_1
    out[:, {FN.is_level_group_1}] = level_group_index == 1
    
    # {FN.is_level_group_2} => FN.is_level_group_2
    out[:, {FN.is_level_group_2}] = level_group_index == 2
    
    # {FN.is_question_0} => FN.is_question_0
    out[:, {FN.is_question_0}] = question_number == 0
    
    # {FN.is_question_1} => FN.is_question_1
    out[:, {FN.is_question_1}] = question_number == 1
    
    # {FN.is_question_2} => FN.is_question_2
    out[:, {FN.is_question_2}] = question_number == 2
    
    # {FN.is_question_3} => FN.is_question_3
    out[:, {FN.is_question_3}] = question_number == 3
    
    # {FN.is_question_4} => FN.is_question_4
    out[:, {FN.is_question_4}] = question_number == 4
    
    # {FN.is_question_5} => FN.is_question_5
    out[:, {FN.is_question_5}] = question_number == 5
    
    # {FN.is_question_6} => FN.is_question_6
    out[:, {FN.is_question_6}] = question_number == 6
    
    # {FN.is_question_7} => FN.is_question_7
    out[:, {FN.is_question_7}] = question_number == 7
    
    # {FN.is_question_8} => FN.is_question_8
    out[:, {FN.is_question_8}] = question_number == 8
    
    # {FN.is_question_9} => FN.is_question_9
    out[:, {FN.is_question_9}] = question_number == 9
    
    # {FN.is_question_10} => FN.is_question_10
    out[:, {FN.is_question_10}] = question_number == 10
    
    # {FN.is_question_11} => FN.is_question_11
    out[:, {FN.is_question_11}] = question_number == 11
    
    # {FN.is_question_12} => FN.is_question_12
    out[:, {FN.is_question_12}] = question_number == 12
    
    # {FN.is_question_13} => FN.is_question_13
    out[:, {FN.is_question_13}] = question_number == 13
    
    # {FN.is_question_14} => FN.is_question_14
    out[:, {FN.is_question_14}] = question_number == 14
    
    # {FN.is_question_15} => FN.is_question_15
    out[:, {FN.is_question_15}] = question_number == 15
    
    # {FN.is_question_16} => FN.is_question_16
    out[:, {FN.is_question_16}] = question_number == 16
    
    # {FN.is_question_17} => FN.is_question_17
    out[:, {FN.is_question_17}] = question_number == 17
    
    # {FN.is_question_18} => FN.is_question_18
    out[:, {FN.is_question_18}] = question_number == 18
    
    # {FN.number_of_events} => FN.number_of_events
    out[:, {FN.number_of_events}] = number_of_events
    
    # {FN.level_group} => FN.level_group
    out[:, {FN.level_group}] = level_group_index
    
    # {FN.question_number} => FN.question_number
    out[:, {FN.question_number}] = np.arange(questions_start_number, questions_end_number)
    
    # {FN.session_start_time} => FN.session_start_time
    out[:, {FN.session_start_time}] = x_et[0]
    
    # {FN.session_end_time} => FN.session_end_time
    out[:, {FN.session_end_time}] = x_et[-1]
    
    # {FN.session_duration} => FN.session_duration
    out[:, {FN.session_duration}] = session_duration
    
    # {FN.average_duration_per_event} => FN.average_duration_per_event
    out[:, {FN.average_duration_per_event}] = session_duration/number_of_events
    
    # {FN.total_hover_duration} => FN.total_hover_duration
    out[:, {FN.total_hover_duration}] = total_hover_duration
    
    # {FN.average_hover_duration} => FN.average_hover_duration
    out[:, {FN.average_hover_duration}] = average_hover_duration

    name_counts = generate_name_counts(x_n)
    
    # {FN.name_basic_0_count} => FN.name_basic_0_count
    out[:, {FN.name_basic_0_count}] = name_counts[0]
    
    # {FN.name_undefined_1_count} => FN.name_undefined_1_count
    out[:, {FN.name_undefined_1_count}] = name_counts[1]
    
    # {FN.name_close_2_count} => FN.name_close_2_count
    out[:, {FN.name_close_2_count}] = name_counts[2]
    
    # {FN.name_open_3_count} => FN.name_open_3_count
    out[:, {FN.name_open_3_count}] = name_counts[3]
    
    # {FN.name_prev_4_count} => FN.name_prev_4_count
    out[:, {FN.name_prev_4_count}] = name_counts[4]
    
    # {FN.name_next_5_count} => FN.name_next_5_count
    out[:, {FN.name_next_5_count}] = name_counts[5]
    
    # {FN.name_basic_0_proportion} => FN.name_basic_0_proportion
    out[:, {FN.name_basic_0_proportion}] = name_counts[0]/number_of_events
    
    # {FN.name_undefined_1_proportion} => FN.name_undefined_1_proportion
    out[:, {FN.name_undefined_1_proportion}] = name_counts[1]/number_of_events
    
    # {FN.name_close_2_proportion} => FN.name_close_2_proportion
    out[:, {FN.name_close_2_proportion}] = name_counts[2]/number_of_events
    
    # {FN.name_open_3_proportion} => FN.name_open_3_proportion
    out[:, {FN.name_open_3_proportion}] = name_counts[3]/number_of_events
    
    # {FN.name_prev_4_proportion} => FN.name_prev_4_proportion
    out[:, {FN.name_prev_4_proportion}] = name_counts[4]/number_of_events
    
    # {FN.name_next_5_proportion} => FN.name_next_5_proportion
    out[:, {FN.name_next_5_proportion}] = name_counts[5]/number_of_events
    
    # {FN.last_name} => FN.last_name
    out[:, {FN.last_name}] = x_n[-1]

    if level_group_index == 0:
        # {HIST.LG0_name_num_unique_count} => HIST.LG0_name_num_unique_count
        hist[{HIST.LG0_name_num_unique_count}] = (name_counts != 0).sum()
        
    # {FN.LG0_name_num_unique_count} => FN.LG0_name_num_unique_count
    # {HIST.LG0_name_num_unique_count} => HIST.LG0_name_num_unique_count
    out[:, {FN.LG0_name_num_unique_count}] = hist[{HIST.LG0_name_num_unique_count}]


    event_name_counts = generate_event_name_counts(x_en)
    # {FN.event_name_cutscene_click_0_count} => FN.event_name_cutscene_click_0_count
    out[:, {FN.event_name_cutscene_click_0_count}] = event_name_counts[0]
    
    # {FN.event_name_person_click_1_count} => FN.event_name_person_click_1_count
    out[:, {FN.event_name_person_click_1_count}] = event_name_counts[1]
    
    # {FN.event_name_navigate_click_2_count} => FN.event_name_navigate_click_2_count
    out[:, {FN.event_name_navigate_click_2_count}] = event_name_counts[2]
    
    # {FN.event_name_observation_click_3_count} => FN.event_name_observation_click_3_count
    out[:, {FN.event_name_observation_click_3_count}] = event_name_counts[3]
    
    # {FN.event_name_notification_click_4_count} => FN.event_name_notification_click_4_count
    out[:, {FN.event_name_notification_click_4_count}] = event_name_counts[4]
    
    # {FN.event_name_object_click_5_count} => FN.event_name_object_click_5_count
    out[:, {FN.event_name_object_click_5_count}] = event_name_counts[5]
    
    # {FN.event_name_object_hover_6_count} => FN.event_name_object_hover_6_count
    out[:, {FN.event_name_object_hover_6_count}] = event_name_counts[6]
    
    # {FN.event_name_map_hover_7_count} => FN.event_name_map_hover_7_count
    out[:, {FN.event_name_map_hover_7_count}] = event_name_counts[7]
    
    # {FN.event_name_map_click_8_count} => FN.event_name_map_click_8_count
    out[:, {FN.event_name_map_click_8_count}] = event_name_counts[8]
    
    # {FN.event_name_checkpoint_9_count} => FN.event_name_checkpoint_9_count
    out[:, {FN.event_name_checkpoint_9_count}] = event_name_counts[9]
    
    # {FN.event_name_notebook_click_10_count} => FN.event_name_notebook_click_10_count
    out[:, {FN.event_name_notebook_click_10_count}] = event_name_counts[10]

    # {FN.last_event_name} => FN.last_event_name
    out[:, {FN.last_event_name}] = x_en[-1]
    
    # {FN.hover_duration_max} => FN.hover_duration_max
    out[:, {FN.hover_duration_max}] = np.nanmax(x_hover_duration)
    
    # {FN.hover_duration_min} => FN.hover_duration_min
    out[:, {FN.hover_duration_min}] = np.nanmin(x_hover_duration)
    
    # {FN.session_weekday} => FN.session_weekday
    out[:, {FN.session_weekday}] = x_session_weekday[0]

    generate_fqids_count_features(out, x_fqids)

    generate_building_based_features(out, x_b, x_et)
    generate_room_based_features(out, x_r, x_et)
    generate_fqid_based_features(out, x_fqids, x_et)
    
    # {FN.level_mean} => FN.level_mean
    out[:, {FN.level_mean}] = x_l.mean()
    
    # {FN.level_std} => FN.level_std
    out[:, {FN.level_std}] = x_l.std()
    generate_level_based_features(out, x_l)
    
    
    # {FN.elapsed_time_diff_max} => FN.elapsed_time_diff_max
    out[:, {FN.elapsed_time_diff_max}] = x_et_diff.max()
    
    # {FN.elapsed_time_diff_min} => FN.elapsed_time_diff_min
    out[:, {FN.elapsed_time_diff_min}] = x_et_diff.min()
    
    # {FN.elapsed_time_diff_std} => FN.elapsed_time_diff_std
    out[:, {FN.elapsed_time_diff_std}] = x_et_diff.std()
    
    # {FN.elapsed_time_diff_median} => FN.elapsed_time_diff_median
    out[:, {FN.elapsed_time_diff_median}] = np.median(x_et_diff)

    x_index_diff = np.diff(x_index)
    session_event_index_range = x_index.max() - x_index.min()
    
    # {FN.session_index_diff_max} => FN.session_index_diff_max
    out[:, {FN.session_index_diff_max}] = x_index_diff.max()

    room_coord_distance_travelled = coordinates_distance_travelled_sum(x_rc_x, x_rc_y)
    # {FN.room_coord_distance_travelled} => FN.room_coord_distance_travelled
    out[:, {FN.room_coord_distance_travelled}] = room_coord_distance_travelled

    screen_coord_distance_travelled = coordinates_distance_travelled_sum(x_sc_x, x_sc_y)
    # {FN.screen_coord_distance_travelled} => FN.screen_coord_distance_travelled
    out[:, {FN.screen_coord_distance_travelled}] = screen_coord_distance_travelled


    text_read_speeds = calculate_read_speeds_of_text(x_tl, x_et)
    # {FN.text_read_speeds_mean} => FN.text_read_speeds_mean
    out[:, {FN.text_read_speeds_mean}] = text_read_speeds.mean()
    # {FN.text_read_speeds_std} => FN.text_read_speeds_std
    out[:, {FN.text_read_speeds_std}] = text_read_speeds.std()

    ###############################################################################################

    building_counts = generate_building_counts(x_b)
    # {FN.building_capitol_0_count} => FN.building_capitol_0_count
    out[:, {FN.building_capitol_0_count}] = building_counts[0]
    
    # {FN.building_capitol_1_count} => FN.building_capitol_1_count
    out[:, {FN.building_capitol_1_count}] = building_counts[1]
    
    # {FN.building_capitol_2_count} => FN.building_capitol_2_count
    out[:, {FN.building_capitol_2_count}] = building_counts[2]
    
    # {FN.building_drycleaner_count} => FN.building_drycleaner_count
    out[:, {FN.building_drycleaner_count}] = building_counts[3]
    
    # {FN.building_flaghouse_count} => FN.building_flaghouse_count
    out[:, {FN.building_flaghouse_count}] = building_counts[4]
    
    # {FN.building_historicalsociety_count} => FN.building_historicalsociety_count
    out[:, {FN.building_historicalsociety_count}] = building_counts[5]
    
    # {FN.building_humanecology_count} => FN.building_humanecology_count
    out[:, {FN.building_humanecology_count}] = building_counts[6]
    
    # {FN.building_kohlcenter_count} => FN.building_kohlcenter_count
    out[:, {FN.building_kohlcenter_count}] = building_counts[7]
    
    # {FN.building_library_count} => FN.building_library_count
    out[:, {FN.building_library_count}] = building_counts[8]
    
    # {FN.building_wildlife_count} => FN.building_wildlife_count
    out[:, {FN.building_wildlife_count}] = building_counts[9]
    
    # {FN.building_nunqiue} => FN.building_nunqiue
    out[:, {FN.building_nunqiue}] = np.sum(building_counts != 0)
    
    # {FN.building_capitol_0_proportion_of_events} => FN.building_capitol_0_proportion_of_events
    out[:, {FN.building_capitol_0_proportion_of_events}] = building_counts[0]/number_of_events
    
    # {FN.building_capitol_1_proportion_of_events} => FN.building_capitol_1_proportion_of_events
    out[:, {FN.building_capitol_1_proportion_of_events}] = building_counts[1]/number_of_events
    
    # {FN.building_capitol_2_proportion_of_events} => FN.building_capitol_2_proportion_of_events
    out[:, {FN.building_capitol_2_proportion_of_events}] = building_counts[2]/number_of_events
    
    # {FN.building_drycleaner_proportion_of_events} => FN.building_drycleaner_proportion_of_events
    out[:, {FN.building_drycleaner_proportion_of_events}] = building_counts[3]/number_of_events
    
    # {FN.building_flaghouse_proportion_of_events} => FN.building_flaghouse_proportion_of_events
    out[:, {FN.building_flaghouse_proportion_of_events}] = building_counts[4]/number_of_events
    
    # {FN.building_historicalsociety_proportion_of_events} => FN.building_historicalsociety_proportion_of_events
    out[:, {FN.building_historicalsociety_proportion_of_events}] = building_counts[5]/number_of_events
    
    # {FN.building_humanecology_proportion_of_events} => FN.building_humanecology_proportion_of_events
    out[:, {FN.building_humanecology_proportion_of_events}] = building_counts[6]/number_of_events
    
    # {FN.building_kohlcenter_proportion_of_events} => FN.building_kohlcenter_proportion_of_events
    out[:, {FN.building_kohlcenter_proportion_of_events}] = building_counts[7]/number_of_events
    
    # {FN.building_library_proportion_of_events} => FN.building_library_proportion_of_events
    out[:, {FN.building_library_proportion_of_events}] = building_counts[8]/number_of_events
    
    # {FN.building_wildlife_proportion_of_events} => FN.building_wildlife_proportion_of_events
    out[:, {FN.building_wildlife_proportion_of_events}] = building_counts[9]/number_of_events

    room_counts = generate_room_counts(x_r)
    
    # {FN.room_basement_count} => FN.room_basement_count
    out[:, {FN.room_basement_count}] = room_counts[0]
    
    # {FN.room_cage_count} => FN.room_cage_count
    out[:, {FN.room_cage_count}] = room_counts[1]
    
    # {FN.room_center_count} => FN.room_center_count
    out[:, {FN.room_center_count}] = room_counts[2]
    
    # {FN.room_closet_count} => FN.room_closet_count
    out[:, {FN.room_closet_count}] = room_counts[3]
    
    # {FN.room_closet_dirty_count} => FN.room_closet_dirty_count
    out[:, {FN.room_closet_dirty_count}] = room_counts[4]
    
    # {FN.room_collection_count} => FN.room_collection_count
    out[:, {FN.room_collection_count}] = room_counts[5]
    
    # {FN.room_collection_flag_count} => FN.room_collection_flag_count
    out[:, {FN.room_collection_flag_count}] = room_counts[6]
    
    # {FN.room_entry_count} => FN.room_entry_count
    out[:, {FN.room_entry_count}] = room_counts[7]
    
    # {FN.room_frontdesk_count} => FN.room_frontdesk_count
    out[:, {FN.room_frontdesk_count}] = room_counts[8]
    
    # {FN.room_hall_count} => FN.room_hall_count
    out[:, {FN.room_hall_count}] = room_counts[9]
    
    # {FN.room_halloffame_count} => FN.room_halloffame_count
    out[:, {FN.room_halloffame_count}] = room_counts[10]
    
    # {FN.room_microfiche_count} => FN.room_microfiche_count
    out[:, {FN.room_microfiche_count}] = room_counts[11]
    
    # {FN.room_stacks_count} => FN.room_stacks_count
    out[:, {FN.room_stacks_count}] = room_counts[12]
    
    # {FN.room_nunqiue} => FN.room_nunqiue
    out[:, {FN.room_nunqiue}] = np.sum(room_counts != 0)

    building_visits_counts = generate_building_visits_counts(x_b)
    
    # {FN.building_capitol_0_visits_count} => FN.building_capitol_0_visits_count
    out[:, {FN.building_capitol_0_visits_count}] = building_visits_counts[0]
    
    # {FN.building_capitol_1_visits_count} => FN.building_capitol_1_visits_count
    out[:, {FN.building_capitol_1_visits_count}] = building_visits_counts[1]
    
    # {FN.building_capitol_2_visits_count} => FN.building_capitol_2_visits_count
    out[:, {FN.building_capitol_2_visits_count}] = building_visits_counts[2]
    
    # {FN.building_drycleaner_visits_count} => FN.building_drycleaner_visits_count
    out[:, {FN.building_drycleaner_visits_count}] = building_visits_counts[3]
    
    # {FN.building_flaghouse_visits_count} => FN.building_flaghouse_visits_count
    out[:, {FN.building_flaghouse_visits_count}] = building_visits_counts[4]
    
    # {FN.building_historicalsociety_visits_count} => FN.building_historicalsociety_visits_count
    out[:, {FN.building_historicalsociety_visits_count}] = building_visits_counts[5]
    
    # {FN.building_humanecology_visits_count} => FN.building_humanecology_visits_count
    out[:, {FN.building_humanecology_visits_count}] = building_visits_counts[6]
    
    # {FN.building_kohlcenter_visits_count} => FN.building_kohlcenter_visits_count
    out[:, {FN.building_kohlcenter_visits_count}] = building_visits_counts[7]
    
    # {FN.building_library_visits_count} => FN.building_library_visits_count
    out[:, {FN.building_library_visits_count}] = building_visits_counts[8]
    
    # {FN.building_wildlife_visits_count} => FN.building_wildlife_visits_count
    out[:, {FN.building_wildlife_visits_count}] = building_visits_counts[9]

    # {FN.building_visits_nunqiue} => FN.building_visits_nunqiue
    out[:, {FN.building_visits_nunqiue}] = np.sum(building_visits_counts != 0)

    room_visits_counts = generate_room_visits_counts(x_r)
    
    # {FN.room_basement_visits_count} => FN.room_basement_visits_count
    out[:, {FN.room_basement_visits_count}] = room_visits_counts[0]
    
    # {FN.room_cage_visits_count} => FN.room_cage_visits_count
    out[:, {FN.room_cage_visits_count}] = room_visits_counts[1]
    
    # {FN.room_center_visits_count} => FN.room_center_visits_count
    out[:, {FN.room_center_visits_count}] = room_visits_counts[2]
    
    # {FN.room_closet_visits_count} => FN.room_closet_visits_count
    out[:, {FN.room_closet_visits_count}] = room_visits_counts[3]
    
    # {FN.room_closet_dirty_visits_count} => FN.room_closet_dirty_visits_count
    out[:, {FN.room_closet_dirty_visits_count}] = room_visits_counts[4]
    
    # {FN.room_collection_visits_count} => FN.room_collection_visits_count
    out[:, {FN.room_collection_visits_count}] = room_visits_counts[5]
    
    # {FN.room_collection_flag_visits_count} => FN.room_collection_flag_visits_count
    out[:, {FN.room_collection_flag_visits_count}] = room_visits_counts[6]
    
    # {FN.room_entry_visits_count} => FN.room_entry_visits_count
    out[:, {FN.room_entry_visits_count}] = room_visits_counts[7]
    
    # {FN.room_frontdesk_visits_count} => FN.room_frontdesk_visits_count
    out[:, {FN.room_frontdesk_visits_count}] = room_visits_counts[8]
    
    # {FN.room_hall_visits_count} => FN.room_hall_visits_count
    out[:, {FN.room_hall_visits_count}] = room_visits_counts[9]
    
    # {FN.room_halloffame_visits_count} => FN.room_halloffame_visits_count
    out[:, {FN.room_halloffame_visits_count}] = room_visits_counts[10]
    
    # {FN.room_microfiche_visits_count} => FN.room_microfiche_visits_count
    out[:, {FN.room_microfiche_visits_count}] = room_visits_counts[11]
    
    # {FN.room_stacks_visits_count} => FN.room_stacks_visits_count
    out[:, {FN.room_stacks_visits_count}] = room_visits_counts[12]

    # {FN.room_visits_nunqiue} => FN.room_visits_nunqiue
    out[:, {FN.room_visits_nunqiue}] = np.sum(room_visits_counts != 0)

    ############################################################################################


    calculate_LG0_features(
        level_group_index,
        x_l,
        out[:],
        hist,
        x_et,
        x_index,
        x_text_numerical,
        x_en,
        x_n,
        x_fqids,
        x_rc_x,
        x_b,
        x_r,
        x_text_fqid_numerical,
    )

    calculate_LG1_features(
        level_group_index,
        x_l,
        out[:],
        hist,
        x_et,
        x_index,
        x_text_numerical,
        x_en,
        x_n,
        x_fqids,
        x_et,
        x_rc_x,
        x_rc_y,
        x_b,
        x_r,
        x_text_fqid_numerical,
    )

    calculate_LG2_features(
        level_group_index,
        x_l,
        out[:],
        hist,
        x_et,
        x_index,
        x_text_numerical,
        x_en,
        x_n,
        x_fqids,
        x_et,
        x_rc_x,
        x_rc_y,
        x_b,
        x_r,
    )

    # num_input_rows = e-s
"""

exec(src)

In [101]:
# print(src)

In [69]:
len(HISTORY_LST), NUM_FEATURES
# (3183, 1590)

(3183, 1590)

In [70]:
with open("FEATURES_GENERATION_INFO.json", "w") as f:
    f.write(json.dumps({
        'NUM_FEATURES': NUM_FEATURES,
        'HISTORY_LEN': len(HISTORY_LST),
    }))

In [71]:
with open("FEATURE_NAMES.json", "w") as f:
    f.write(json.dumps(FEATURE_NAMES))

In [72]:
cc.verbose = False
cc.compile()

In [73]:
!ls -lh

total 7.2G
-rw-rw-r-- 1 na   na   1.7K Jul 30 09:21 conversion.ipynb
-rw-rw-r-- 1 na   na   147K Jul 31 12:23 feature_code_dynamic_funcs.ipynb
-rw-rw-r-- 1 na   na   119K Aug  1 09:46 feature_code_dynamic_funcs.py
-rw-rw-r-- 1 na   na   141K Aug  1 09:46 FEATURE_NAMES.json
-rw-rw-r-- 1 na   na   162K Aug  1 09:45 features_code.ipynb
-rw-rw-r-- 1 na   na     43 Aug  1 09:46 FEATURES_GENERATION_INFO.json
drwxrwxr-x 4 na   na   4.0K Jul 30 09:21 input
-rw-rw-r-- 1 na   na   748K Jul 31 11:30 JoWilder_C_features.cpp
-rwxrwxr-x 1 na   na   988K Aug  1 09:47 JoWilder_numba_features.cpython-37m-x86_64-linux-gnu.so
drwx------ 2 root root  16K Apr 20 06:30 lost+found
drwxrwxr-x 2 na   na   4.0K Aug  1 09:42 modules
drwxrwxr-x 2 na   na   4.0K Jul 31 11:30 modules_
drwxrwxr-x 3 na   na   4.0K Jul 30 21:58 place_1st
drwxrwxr-x 3 na   na   4.0K Jul 31 22:06 place_2nd
-rw-rw-r-- 1 na   na   968M Jul 28 22:32 predict-student-performance-from-game-play.zip
-rw-rw-r-- 1 na   na   6.3G 