# Feature Extraction Tools

In [None]:
# time series classification algorithms library
!pip install sktime
# For saving and loading the trained models
!pip install joblib

In [1]:
# For the dataset we will be using UCR UEA time series archive and access it through sktime library 
from sktime.datasets import load_UCR_UEA_dataset
import os
import joblib
import pandas as pd

In [2]:
# A function for saving each model after tarining
def save_model(name, clf):
    # Specify the directory and filename where you want to save the model
    save_directory = './model/'
    model_filename = f"{name}-InlineSkate.pkl" 

    # Combine the directory and filename to create the full file path
    full_model_path = os.path.join(save_directory, model_filename)

    # Save the trained model to the specified directory
    joblib.dump(clf, full_model_path)

## Data Preperations

In [3]:
# Extract the training data of the InlineSkate DB, We will use the same train data for all 4 feature extractions tools 
x_train, y_train = load_UCR_UEA_dataset("InlineSkate", split="train", return_X_y=True)

In [4]:
# It means we have 100 windows for the timeseries data, for example: [ [window1] ... [window100] ] and each window has a corresponding label
print(len(x_train))
print(len(y_train))

100
100


In [5]:
# A little insight into the x train data
x_train

Unnamed: 0,dim_0
0,0 -0.036166 1 -0.151640 2 -0.08...
1,0 -0.554269 1 -0.593670 2 -0.57...
2,0 0.149520 1 0.026359 2 0.16...
3,0 -0.363134 1 -0.646226 2 -0.69...
4,0 0.090731 1 -0.092733 2 -0.02...
...,...
95,0 -0.109753 1 -0.234050 2 -0.14...
96,0 0.127897 1 -0.000188 2 0.03...
97,0 -0.729887 1 -0.902955 2 -0.77...
98,0 0.042380 1 -0.122538 2 -0.13...


In [4]:
# Lets take a look on the first window, we can see that each window contains 1882 float values
# So it means  that in total we have 1882 * 100 = 188,200 measurements
single_time_series = x_train.values[0]
single_time_series

array([0      -0.036166
       1      -0.151640
       2      -0.089001
       3      -0.133278
       4      -0.077171
                 ...
       1877   -0.155756
       1878   -0.180096
       1879   -0.127859
       1880   -0.192024
       1881   -0.110382
       Length: 1882, dtype: float64], dtype=object)

In [5]:
# Getting the single time series ready for transformation using a trained transformer
single_time_series_df = pd.DataFrame({"first_window": single_time_series})
single_time_series_df

Unnamed: 0,first_window
0,0 -0.036166 1 -0.151640 2 -0.08...


In [6]:
# The second window
single_time_series1 = x_train.values[1]
single_time_series1

array([0      -0.554269
       1      -0.593670
       2      -0.579670
       3      -0.602387
       4      -0.604805
                 ...
       1877   -0.493976
       1878   -0.531907
       1879   -0.498096
       1880   -0.537827
       1881   -0.517406
       Length: 1882, dtype: float64], dtype=object)

In [7]:
# Getting the second window ready for transformation using a trained transformer
single_time_series_df1 = pd.DataFrame({"second_window": single_time_series1})
single_time_series_df1

Unnamed: 0,second_window
0,0 -0.554269 1 -0.593670 2 -0.57...


## SFA - Symbolic Fourier Approximation Transformer

In [7]:
# Import the time series tranformer - SFA
from sktime.transformations.panel.dictionary_based import SFA

In [52]:
# Initialize SFA transformer
sfa_transformer = SFA()

# Fit data
sfa_transformer.fit(x_train, y_train)
save_model("SFA", sfa_transformer)

In [8]:
# Load the model if exists
sfa_transformer = joblib.load("./model/SFA-InlineSkate.pkl")

In [32]:
# Transform single time series using SFA transformer
transformed_sfa_window = sfa_transformer.transform(single_time_series_df)

In [34]:
# We can see the first window, which has 1882 floats in it, has been transformed to a dictionary
# A dictionary where the keys are the words(represented as integers) and the values are the frequency of the word in the window (how many times it was captured inside the window)
# This process allows us to capture important characteristics of the data while reducing its complexity, making it easier to analyze and interpret.
print("number of features in a window before transformation:", len(x_train.values[0][0]))
print("number of features (words) in a window after transformation:", len(transformed_sfa_window[0][0]))
print("The total frequency of all the words inside the trandformed window:", sum(transformed_sfa_window[0][0].values()))
print("------------------------------------------------------------------------------------------------------------------------------------------")
print("The transformed window:")
print(transformed_sfa_window)

number of features in a window before transformation: 1882
number of features (words) in a window after transformation: 1489
The total frequency of all the words inside the trandformed window: 1871
------------------------------------------------------------------------------------------------------------------------------------------
The transformed window:
[[{34201: 1, 33110: 1, 33177: 1, 33429: 1, 34453: 1, 34327: 1, 36445: 1, 36496: 1, 36241: 1, 36183: 1, 34094: 1, 34216: 1, 33233: 1, 33175: 1, 34125: 1, 35076: 1, 35089: 1, 36199: 1, 34989: 1, 34004: 2, 33106: 2, 33115: 1, 33116: 1, 33313: 1, 34534: 1, 35546: 1, 35401: 1, 36373: 1, 36134: 2, 36282: 1, 36053: 1, 32919: 1, 32909: 1, 33028: 1, 33041: 1, 19844: 1, 18690: 1, 33063: 1, 33213: 1, 33524: 1, 33490: 1, 35530: 1, 36104: 1, 36113: 1, 36150: 1, 36073: 1, 34005: 1, 33111: 2, 33181: 1, 33092: 1, 33298: 1, 35163: 1, 36188: 1, 36112: 1, 35923: 1, 33887: 1, 33133: 1, 33172: 2, 33217: 2, 34182: 1, 34058: 1, 34105: 2, 36276: 2, 36305:

In [29]:
# Transform single time series using SFA transformer
transformed_sfa_window1 = sfa_transformer.transform(single_time_series_df1)

In [35]:
print("number of features in a window before transformation:", len(x_train.values[0][0]))
print("number of features (words) in a window after transformation:", len(transformed_sfa_window1[0][0]))
print("The total frequency of all the words inside the trandformed window:", sum(transformed_sfa_window1[0][0].values()))
print("------------------------------------------------------------------------------------------------------------------------------------------")
print("The transformed window:")
print(transformed_sfa_window1)

number of features in a window before transformation: 1882
number of features (words) in a window after transformation: 1305
The total frequency of all the words inside the trandformed window: 1871
------------------------------------------------------------------------------------------------------------------------------------------
The transformed window:
[[{19179: 1, 19391: 1, 19389: 1, 19445: 1, 19367: 1, 19375: 1, 20397: 1, 20405: 1, 20151: 1, 19135: 1, 18430: 1, 19433: 1, 19431: 1, 20383: 1, 20334: 1, 20345: 1, 20214: 1, 20203: 1, 19166: 1, 2925: 1, 2934: 1, 4090: 1, 3819: 3, 1727: 1, 3069: 1, 1957: 1, 2983: 1, 2991: 2, 2990: 1, 3001: 1, 4070: 1, 3755: 3, 18047: 1, 18365: 1, 18421: 1, 19415: 1, 19359: 1, 20333: 1, 20341: 1, 20215: 1, 20207: 1, 2798: 2, 2985: 3, 2998: 1, 3817: 1, 3751: 1, 3695: 4, 3774: 2, 1719: 2, 1790: 2, 1770: 1, 1946: 1, 3929: 1, 3622: 2, 3707: 1, 3837: 2, 2806: 2, 1755: 1, 1695: 1, 1641: 1, 1637: 1, 1975: 1, 3054: 4, 3801: 1, 3927: 1, 3709: 1, 3818: 1, 2779:

In [49]:
transformed_sfa_series = sfa_transformer.transform(x_train)
transformed_sfa_series

[[{34201: 1,
   33110: 1,
   33177: 1,
   33429: 1,
   34453: 1,
   34327: 1,
   36445: 1,
   36496: 1,
   36241: 1,
   36183: 1,
   34094: 1,
   34216: 1,
   33233: 1,
   33175: 1,
   34125: 1,
   35076: 1,
   35089: 1,
   36199: 1,
   34989: 1,
   34004: 2,
   33106: 2,
   33115: 1,
   33116: 1,
   33313: 1,
   34534: 1,
   35546: 1,
   35401: 1,
   36373: 1,
   36134: 2,
   36282: 1,
   36053: 1,
   32919: 1,
   32909: 1,
   33028: 1,
   33041: 1,
   19844: 1,
   18690: 1,
   33063: 1,
   33213: 1,
   33524: 1,
   33490: 1,
   35530: 1,
   36104: 1,
   36113: 1,
   36150: 1,
   36073: 1,
   34005: 1,
   33111: 2,
   33181: 1,
   33092: 1,
   33298: 1,
   35163: 1,
   36188: 1,
   36112: 1,
   35923: 1,
   33887: 1,
   33133: 1,
   33172: 2,
   33217: 2,
   34182: 1,
   34058: 1,
   34105: 2,
   36276: 2,
   36305: 1,
   34141: 1,
   34132: 1,
   35152: 1,
   36226: 2,
   34826: 1,
   33833: 1,
   32885: 1,
   33254: 1,
   33226: 1,
   33354: 1,
   33305: 1,
   35620: 1,
   36513: 1,

In [54]:
transformed_sfa_series[0][1]

{19179: 1,
 19391: 1,
 19389: 1,
 19445: 1,
 19367: 1,
 19375: 1,
 20397: 1,
 20405: 1,
 20151: 1,
 19135: 1,
 18430: 1,
 19433: 1,
 19431: 1,
 20383: 1,
 20334: 1,
 20345: 1,
 20214: 1,
 20203: 1,
 19166: 1,
 2925: 1,
 2934: 1,
 4090: 1,
 3819: 3,
 1727: 1,
 3069: 1,
 1957: 1,
 2983: 1,
 2991: 2,
 2990: 1,
 3001: 1,
 4070: 1,
 3755: 3,
 18047: 1,
 18365: 1,
 18421: 1,
 19415: 1,
 19359: 1,
 20333: 1,
 20341: 1,
 20215: 1,
 20207: 1,
 2798: 2,
 2985: 3,
 2998: 1,
 3817: 1,
 3751: 1,
 3695: 4,
 3774: 2,
 1719: 2,
 1790: 2,
 1770: 1,
 1946: 1,
 3929: 1,
 3622: 2,
 3707: 1,
 3837: 2,
 2806: 2,
 1755: 1,
 1695: 1,
 1641: 1,
 1637: 1,
 1975: 1,
 3054: 4,
 3801: 1,
 3927: 1,
 3709: 1,
 3818: 1,
 2779: 1,
 1630: 1,
 3672: 1,
 2658: 1,
 2427: 2,
 1726: 3,
 2792: 1,
 1682: 1,
 1627: 2,
 2653: 1,
 2660: 1,
 3746: 1,
 3739: 1,
 3421: 1,
 1317: 1,
 2485: 1,
 1510: 2,
 1497: 1,
 1349: 1,
 2325: 1,
 2341: 1,
 2421: 2,
 3557: 2,
 1431: 2,
 350: 1,
 553: 1,
 677: 1,
 998: 2,
 1899: 1,
 3740: 1,
 3665:

In [66]:
# From this we can determine that the words (keys) the SFA generates are not unique between different dictionaries and that it reduced the number of features to 14982
unique_features = set()
num_of_total_keys_in_series = 0
for arr in transformed_sfa_series:
    for dictionary in arr:
        num_of_total_keys_in_series += len(dictionary)
        unique_features.update(dictionary.keys())
    
print("number of unique features by suming lenghts of dicts:", num_of_total_keys_in_series)
print("number of unique features by creating a set with the dictionaries keys:", len(unique_features))

number of unique features by suming lenghts of dicts: 142315
number of unique features by creating a set with the dictionaries keys: 14982


In [13]:
# For each word the transformer formed, we can see the set of integers creating the word
# Each window is shortened  to an 8 digits word, each digit in a word can be between 0 and 3 (according to documentation)
for word in transformed_sfa_window[0][0]:
    print(f"The word: {word} | The list of integers to obtain the word: {sfa_transformer.word_list(word)}")

The word: 34201 | The list of integers to obtain the word: [2, 0, 1, 1, 2, 1, 2, 1]
The word: 33110 | The list of integers to obtain the word: [2, 0, 0, 1, 1, 1, 1, 2]
The word: 33177 | The list of integers to obtain the word: [2, 0, 0, 1, 2, 1, 2, 1]
The word: 33429 | The list of integers to obtain the word: [2, 0, 0, 2, 2, 1, 1, 1]
The word: 34453 | The list of integers to obtain the word: [2, 0, 1, 2, 2, 1, 1, 1]
The word: 34327 | The list of integers to obtain the word: [2, 0, 1, 2, 0, 1, 1, 3]
The word: 36445 | The list of integers to obtain the word: [2, 0, 3, 2, 1, 1, 3, 1]
The word: 36496 | The list of integers to obtain the word: [2, 0, 3, 2, 2, 1, 0, 0]
The word: 36241 | The list of integers to obtain the word: [2, 0, 3, 1, 2, 1, 0, 1]
The word: 36183 | The list of integers to obtain the word: [2, 0, 3, 1, 1, 1, 1, 3]
The word: 34094 | The list of integers to obtain the word: [2, 0, 1, 1, 0, 2, 3, 2]
The word: 34216 | The list of integers to obtain the word: [2, 0, 1, 1, 2, 2

## ROCKET

In [17]:
# Import the time series tranformer - SFA
from sktime.transformations.panel.rocket import Rocket

In [18]:
# Initialize ROCKET transformer
rocket_transformer = Rocket()

# Fit data
rocket_transformer.fit(x_train, y_train)
save_model("ROCKET", rocket_transformer)

In [None]:
# Load the model if exists
rocket_transformer = joblib.load("./model/ROCKET-InlineSkate.pkl")

In [25]:
type(single_time_series_df)

pandas.core.frame.DataFrame

In [21]:
# Transform single time series using ROCKET transformer
# The type of single_time_series_df is pandas.core.frame.DataFrame so according to documentation return is a single Series of the same mtype Example: detrending a single series
transformed_rocket_window = rocket_transformer.transform(single_time_series_df)

In [46]:
# We can see the first window, which has 1882 floats in it, has been transformed to a Series, a vector of 1 col and 20,000 columns
# Each column represents a feature extracted from the input of the whole time series, it means that during the fit() process of the ROCKET algorithm, 20,000 features were extracted from the training dataset
# The values in each column typically represent statistical characteristics of the input time series data, for example: Mean, Standard deviation, Skewness, etc.
print("number of features in a window before transformation:", len(x_train.values[0][0]))
# Prints the number of features (number of columns) ROCKET algorithm extracted from the time series data
print("number of features in a window after transformation:", transformed_rocket_window.shape[1])
print("------------------------------------------------------------------------------------------------------------------------------------------")
print(type(transformed_rocket_window))
transformed_rocket_window

number of features in a window before transformation: 1882
number of features in a window after transformation: 20000
------------------------------------------------------------------------------------------------------------------------------------------
<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,19990,19991,19992,19993,19994,19995,19996,19997,19998,19999
0,0.532412,2.174425,0.317346,3.293074,0.388244,1.842634,0.036663,0.183144,0.396316,4.609451,...,0.990249,1.735877,1.0,1.253237,0.748019,5.066895,0.902763,0.549569,1.0,0.565368


In [None]:
# Transform the second window of the time series
transformed_rocket_window1 = rocket_transformer.transform(single_time_series_df1)

In [47]:
print("number of features in a window before transformation:", len(x_train.values[0][0]))
# Prints the number of features (number of columns) ROCKET algorithm extracted from the time series data
print("number of features in a window after transformation:", transformed_rocket_window1.shape[1])
print("------------------------------------------------------------------------------------------------------------------------------------------")
transformed_rocket_window1

number of features in a window before transformation: 1882
number of features in a window after transformation: 20000
------------------------------------------------------------------------------------------------------------------------------------------


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,19990,19991,19992,19993,19994,19995,19996,19997,19998,19999
0,0.561105,5.012499,0.411301,6.280939,0.142961,5.004242,0.038257,0.703951,0.57708,7.006855,...,0.973998,3.247479,1.0,1.797986,0.757528,7.586633,0.883634,1.013285,1.0,0.642401


In [None]:
# By default ROCKET normalise the time series values with the followeing formula, can be changed by using: transforner = Rocket(normalise = False)  
"""
if self.normalise:
            X = (X - X.mean(axis=-1, keepdims=True)) / (
                X.std(axis=-1, keepdims=True) + 1e-8
            ) 
"""

## ShapeletTransform

In [None]:
# get_shapelets()

In [8]:
from sktime.transformations.panel.shapelet_transform import ShapeletTransform

In [None]:
# Initialize Shapelet transformer
shapelet_transformer = ShapeletTransform()

# Fit data
shapelet_transformer.fit(x_train, y_train)
save_model("Shapelet", shapelet_transformer)

In [None]:
# Load the model if exists
shapelet_transformer = joblib.load("./model/Shapelet-InlineSkate.pkl")

In [None]:
# Transform single time series using Shapelet transformer
# The type of single_time_series_df is pandas.core.frame.DataFrame so according to documentation return is a single Series of the same mtype Example: detrending a single series
transformed_shapelet_window = shapelet_transformer.transform(single_time_series_df)

In [None]:
# We can see the first window, which has 1882 floats in it, has been transformed to 
print("number of features in a window before transformation:", len(x_train.values[0][0]))
# Prints the number of features (number of columns) ROCKET algorithm extracted from the time series data
   #print("number of features in a window after transformation:", transformed_rocket_window.shape[1])
print("------------------------------------------------------------------------------------------------------------------------------------------")
print(type(transformed_shapelet_window))
transformed_shapelet_window

In [None]:
# Transform the second window of the time series
transformed_shapelet_window1 = shapelet_transformer.transform(single_time_series_df1)

In [None]:
print("number of features in a window before transformation:", len(x_train.values[0][0]))
# Prints the number of features (number of columns) ROCKET algorithm extracted from the time series data
     # print("number of features in a window after transformation:", transformed_rocket_window1.shape[1])
print("------------------------------------------------------------------------------------------------------------------------------------------")
transformed_shapelet_window1

## Signature Transform

## SFA

In [79]:
from sktime.datasets import load_arrow_head
from sktime.transformations.panel.dictionary_based import SFA
import pandas as pd
# from sktime.pipeline import Pipeline
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.model_selection import train_test_split
# from sklearn.metrics import accuracy_score


In [43]:
# Load data
# X, y = load_arrow_head(return_X_y=True)
# X_train, X_test, y_train, y_test = train_test_split(X, y)

In [80]:
# Load data
X_train, y_train = load_arrow_head(split="train", return_X_y=True)

# Initialize SFA transformer
sfa_transformer = SFA()

# Fit and transform data
X_train_sfa = sfa_transformer.fit_transform(X_train)
sfa_transformer.fit(X_train, y_train)


In [89]:
X_train_array = X_train.values
print(X_train_array[0])
print("len:", len(X_train_array))

[0     -1.963009
 1     -1.957825
 2     -1.956145
 3     -1.938289
 4     -1.896657
          ...
 246   -1.841345
 247   -1.884289
 248   -1.905393
 249   -1.923905
 250   -1.909153
 Length: 251, dtype: float64]
len: 36


In [88]:
# y_train_array = y_train.values
print(y_train)
print("len:", len(y_train))

['0' '1' '2' '0' '1' '2' '0' '1' '2' '0' '1' '2' '0' '1' '2' '0' '1' '2'
 '0' '1' '2' '0' '1' '2' '0' '1' '2' '0' '1' '2' '0' '1' '2' '0' '1' '2']
len: 36


In [82]:
print(X_train)

                                                dim_0
0   0     -1.963009
1     -1.957825
2     -1.95614...
1   0     -1.774571
1     -1.774036
2     -1.77658...
2   0     -1.866021
1     -1.841991
2     -1.83502...
3   0     -2.073758
1     -2.073301
2     -2.04460...
4   0     -1.746255
1     -1.741263
2     -1.72274...
5   0     -1.982806
1     -1.978861
2     -1.93733...
6   0     -2.083023
1     -2.092058
2     -2.04946...
7   0     -1.633596
1     -1.643175
2     -1.61367...
8   0     -1.717013
1     -1.728059
2     -1.68330...
9   0     -2.245295
1     -2.223850
2     -2.17187...
10  0     -1.844180
1     -1.839913
2     -1.82574...
11  0     -1.833709
1     -1.827681
2     -1.77645...
12  0     -2.105590
1     -2.121538
2     -2.03591...
13  0     -1.900535
1     -1.882731
2     -1.86845...
14  0     -2.188812
1     -2.185519
2     -2.17646...
15  0     -2.164456
1     -2.178541
2     -2.06604...
16  0     -2.053743
1     -2.036852
2     -2.03303...
17  0     -1.653743
1     -1

In [131]:
# Assuming X_train_array[0] is a single time series array
single_time_series = X_train_array[0]

# Convert single time series to DataFrame
single_time_series_df = pd.DataFrame({"column_name": single_time_series})
# print(single_time_series_df)
# print("len:", len(single_time_series_df))
print(single_time_series)

[0     -1.963009
 1     -1.957825
 2     -1.956145
 3     -1.938289
 4     -1.896657
          ...
 246   -1.841345
 247   -1.884289
 248   -1.905393
 249   -1.923905
 250   -1.909153
 Length: 251, dtype: float64]


In [124]:
# Transform single time series using SFA transformer
tr1 = sfa_transformer.transform(single_time_series_df)
print(tr1)
print("len:", len(tr1[0][0]))

[[{2117: 1, 1044: 1, 1108: 3, 1088: 1, 1089: 4, 1092: 2, 1028: 1, 1024: 1, 2112: 1, 17476: 1, 17472: 3, 17473: 1, 17413: 3, 17412: 1, 17409: 1, 17428: 1, 16464: 1, 16705: 1, 33029: 1, 33044: 1, 33040: 1, 33041: 1, 33025: 1, 33028: 2, 33024: 2, 34048: 3, 50433: 1, 50436: 2, 50180: 1, 50192: 1, 50257: 1, 50261: 1, 49237: 2, 49425: 4, 49429: 1, 49428: 2, 49409: 3, 49412: 1, 51456: 1, 51474: 1, 51227: 1, 50284: 1, 49585: 1, 49575: 1, 49839: 1, 49838: 1, 50106: 1, 51115: 1, 52139: 1, 52142: 1, 52154: 1, 35771: 1, 35775: 2, 35519: 2, 35838: 1, 35835: 1, 35839: 1, 35834: 2, 35818: 1, 36843: 1, 36782: 2, 36522: 1, 36586: 1, 36587: 1, 36591: 1, 36543: 1, 35583: 2, 33535: 1, 33471: 2, 18346: 2, 19370: 2, 20410: 1, 20218: 1, 20219: 1, 20207: 3, 20222: 2, 3834: 2, 3839: 3, 2815: 1, 2814: 1, 3050: 1, 3051: 1, 4014: 1, 3757: 1, 3754: 2, 3819: 1, 3550: 1, 3480: 1, 3393: 1, 3157: 1, 2069: 1, 1104: 1, 1285: 2, 1284: 2, 1280: 1, 1344: 2, 1045: 1, 277: 1, 357: 1, 422: 1, 682: 1, 922: 1, 1945: 1, 3941: 1,

In [104]:
tr2 = sfa_transformer.words
tr2

[]

In [103]:
X_train_sfa

[[{2117: 1,
   1044: 1,
   1108: 3,
   1088: 1,
   1089: 4,
   1092: 2,
   1028: 1,
   1024: 1,
   2112: 1,
   17476: 1,
   17472: 3,
   17473: 1,
   17413: 3,
   17412: 1,
   17409: 1,
   17428: 1,
   16464: 1,
   16705: 1,
   33029: 1,
   33044: 1,
   33040: 1,
   33041: 1,
   33025: 1,
   33028: 2,
   33024: 2,
   34048: 3,
   50433: 1,
   50436: 2,
   50180: 1,
   50192: 1,
   50257: 1,
   50261: 1,
   49237: 2,
   49425: 4,
   49429: 1,
   49428: 2,
   49409: 3,
   49412: 1,
   51456: 1,
   51474: 1,
   51227: 1,
   50284: 1,
   49585: 1,
   49575: 1,
   49839: 1,
   49838: 1,
   50106: 1,
   51115: 1,
   52139: 1,
   52142: 1,
   52154: 1,
   35771: 1,
   35775: 2,
   35519: 2,
   35838: 1,
   35835: 1,
   35839: 1,
   35834: 2,
   35818: 1,
   36843: 1,
   36782: 2,
   36522: 1,
   36586: 1,
   36587: 1,
   36591: 1,
   36543: 1,
   35583: 2,
   33535: 1,
   33471: 2,
   18346: 2,
   19370: 2,
   20410: 1,
   20218: 1,
   20219: 1,
   20207: 3,
   20222: 2,
   3834: 2,
   3839: 

In [151]:
sfa_transformer.word_list(18346)

[1, 0, 1, 3, 2, 2, 2, 2]

In [149]:
print(sum(tr1[0][0].values()))

240
