In [18]:
import sys
from pathlib import Path

In [19]:
project_directory = str(Path('../..').resolve())
sys.path.extend([project_directory])

In [20]:
from functools import partial
from os import getcwd, path

In [21]:
from environs import Env
import pandas as pd

In [22]:
from internal.extract_features.data_sources_factory import data_sources_factory
from internal.extract_features.extract_features import extract_features
from internal.extract_features.save_features import save_features
from src.pkg.config_constants import PRECISION
from src.pkg.get_output_dir import get_output_dir
from src.pkg.load_config import load_config
from src.pkg.load_env import load_env

In [23]:
from IPython.display import display

In [24]:
def get_precision(env: Env):
    return env.int(PRECISION)

In [25]:
cwd = getcwd()
process_dir = cwd

In [26]:
env = load_env(process_dir)
config = load_config(process_dir, env)
precision = get_precision(env)
output_dir = get_output_dir(project_directory, config)

In [9]:
ds_factory = data_sources_factory(project_directory, precision)

In [27]:
def print_features(character: str, features: pd.DataFrame):
    print('FEATURES FOR CHARACTER {0}'.format(character))
    display(features)

In [28]:
extract_features(
    cwd=project_directory,
    config=config,
    data_source_factory=ds_factory,
    parallel=True,
    show_progress=True,
    done_callback=print_features
)

PROCESSING DATA SOURCES FOR CHARACTER: 0


100%|██████████| 4132/4132 [00:27<00:00, 151.47it/s]


FEATURES FOR CHARACTER 0


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1.000000,-0.307347,-0.681468,0.679601,-0.900790,0.021635,0.546735,-0.680195,0.678328,0.284822,-0.023502,0.679601,-0.279378,-0.417008
1,-0.564491,0.896516,0.125406,1.000000,-0.971529,-0.055541,0.081055,-0.877503,-0.598985,-0.552266,0.642828,-0.448782,0.642828,0.379093
2,-1.000000,-0.051615,0.862930,0.282044,0.786467,-0.385275,0.223417,0.838142,-0.592194,0.555459,0.195612,-0.663324,-0.505690,-0.406901
3,0.777581,-0.695295,-0.842960,0.581775,-0.138234,1.000000,-0.889064,-0.294194,0.450822,0.777715,0.869459,0.110859,-0.176763,-0.840521
4,0.632113,-0.853731,-0.178894,1.000000,0.490509,0.871269,-0.979369,-0.200712,0.794314,0.003620,-0.271581,-0.737873,-0.642327,0.420709
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4127,-1.000000,0.325832,0.915533,-0.034656,-0.787948,-0.267913,0.526772,0.601499,-0.215409,0.654512,0.357130,-0.479964,-0.151793,-0.437554
4128,-0.693487,0.832738,0.589998,-0.866691,1.000000,-0.281398,0.209706,0.939695,-0.230007,-0.842923,-0.828966,-0.179789,0.647042,0.483345
4129,-0.859326,0.781654,0.638012,-0.954014,-0.152476,1.000000,0.928153,-0.228660,-0.925529,-0.009903,0.548206,0.565775,-0.234715,-0.654662
4130,-1.000000,-0.266261,0.562695,-0.827396,0.063546,0.993029,0.662524,0.711157,0.929716,-0.049311,-0.288795,-0.798034,-0.611773,0.567285


PROCESSING DATA SOURCES FOR CHARACTER: 1


100%|██████████| 4684/4684 [00:33<00:00, 139.25it/s]


FEATURES FOR CHARACTER 1


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1.0,-0.109861,-0.960086,0.139605,0.750535,0.032691,0.643621,-0.288050,-0.468282,0.160987,0.322880,-0.074223,-0.104775,-0.074223
1,1.0,-0.584469,-0.931628,0.529389,0.718010,-0.549221,0.577016,-0.302480,-0.445196,0.296748,0.224529,-0.161485,-0.085660,0.064107
2,-1.0,0.491965,0.993905,-0.305597,0.794514,-0.305597,-0.700914,0.292575,0.528660,-0.272365,-0.302133,0.093184,0.096648,-0.106206
3,1.0,-0.115939,-0.931667,0.284973,-0.968113,-0.006599,-0.567201,0.139187,0.562641,-0.115939,-0.275629,0.066294,0.125283,-0.115939
4,-1.0,0.563215,0.879280,-0.470389,-0.780751,0.437930,0.503424,-0.282461,-0.467537,0.250002,-0.154324,0.093395,0.127568,-0.094533
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4679,-1.0,0.450729,0.708096,-0.688002,-0.620423,0.450729,0.455045,-0.371688,-0.288293,0.355835,0.233625,-0.023742,-0.051058,0.166046
4680,-1.0,0.000000,0.931952,0.000000,-0.694955,0.000000,0.626907,0.000000,-0.389910,0.000000,0.321862,0.000000,-0.084865,0.000000
4681,-1.0,0.097483,0.941115,-0.125268,0.718364,0.097483,-0.560863,0.116576,0.431970,-0.141178,-0.217190,-0.036167,0.097844,0.002019
4682,-1.0,0.072159,-0.756590,0.072159,0.703868,-0.171251,-0.432044,0.072159,0.379322,-0.008978,0.135912,-0.008978,-0.107498,0.072159


PROCESSING DATA SOURCES FOR CHARACTER: 2


100%|██████████| 4177/4177 [00:28<00:00, 147.23it/s]


FEATURES FOR CHARACTER 2


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,0.974814,-0.632962,-0.949078,-0.486046,-0.971338,0.388768,1.000000,-0.015917,0.722959,0.626313,0.397647,-0.290159,-0.217562,0.303624
1,-1.000000,0.546940,-1.000000,-0.144343,0.684027,-0.705685,0.782782,-0.180726,-0.388760,0.597877,0.382566,0.316512,0.200649,-0.362643
2,0.629901,-0.822572,-0.712508,0.745137,0.798876,0.613713,-1.000000,0.022301,0.514122,-0.065315,0.141752,-0.470541,0.049755,0.456003
3,-0.921970,0.562183,-1.000000,-0.342962,0.623019,-0.833840,0.660473,0.712000,-0.271722,0.645415,0.342112,-0.336720,0.248476,0.187640
4,1.000000,0.366587,0.809422,-0.677196,-0.832484,0.513186,0.853401,-0.219808,-0.798278,-0.170941,-0.257817,0.328472,0.116743,-0.205148
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4172,-0.790448,-0.835531,0.433239,1.000000,0.547960,-0.873771,-0.905169,-0.032486,-0.492175,0.541117,0.356759,-0.261928,0.127317,0.311676
4173,0.831420,1.000000,-0.806532,-0.572651,0.336225,-0.866503,-0.920808,0.047702,0.336225,0.504805,-0.414730,0.406855,0.336225,-0.180849
4174,-1.000000,-0.424642,0.654161,0.709640,0.862113,0.085785,-0.801501,0.237022,0.717177,-0.361626,0.244559,-0.267103,-0.196550,0.016468
4175,0.550359,0.913123,0.717965,-0.735006,-1.000000,-0.218219,-0.771869,0.433583,0.596916,-0.185629,0.053747,-0.620164,0.005638,0.256665


PROCESSING DATA SOURCES FOR CHARACTER: 3


100%|██████████| 4351/4351 [00:29<00:00, 147.18it/s]


FEATURES FOR CHARACTER 3


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,-1.000000,0.141770,0.721381,-0.699470,-0.673229,0.420886,0.735969,-0.028423,0.429621,0.482156,-0.489421,-0.232655,-0.080958,0.175808
1,-0.806485,-1.000000,0.874413,0.680898,0.442932,0.924778,0.949453,0.080577,-0.690709,-0.015903,0.217812,0.305698,0.081132,-0.305343
2,1.000000,-0.641750,0.904049,0.029906,-0.765495,0.452089,-0.842255,-0.200376,0.424295,0.253791,-0.391287,0.269783,-0.055459,-0.257947
3,1.000000,-0.613469,0.946758,0.105291,-0.836833,-0.373882,-0.783592,0.211775,0.424991,0.536548,-0.140890,0.390514,-0.144693,-0.160916
4,0.826544,-0.623046,-1.000000,0.176067,-0.771682,0.632703,0.758049,-0.063667,-0.286506,-0.280569,-0.125970,0.325901,0.312829,-0.080790
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4346,-0.953824,-0.913677,0.883992,-0.721347,0.622210,0.929481,1.000000,0.139556,-0.820262,-0.139016,-0.099026,0.536427,-0.190611,-0.226786
4347,-0.934918,-0.406802,1.000000,-0.040340,0.706831,-0.612021,0.570018,0.619291,-0.729699,0.208854,-0.114044,-0.245559,-0.084727,0.252829
4348,-1.000000,0.023439,0.910448,-0.134940,0.662981,-0.629875,0.662981,0.211514,-0.524863,0.162021,0.108654,0.340197,-0.208104,-0.174535
4349,1.000000,-0.322613,-0.887658,0.067937,-0.692383,-0.517888,-0.555691,0.380377,0.576904,0.263212,0.056171,-0.257521,0.018976,0.207420


PROCESSING DATA SOURCES FOR CHARACTER: 4


100%|██████████| 4072/4072 [00:35<00:00, 114.96it/s]


FEATURES FOR CHARACTER 4


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,-0.847689,1.000000,0.920046,0.234382,0.070614,-0.941557,-0.334605,0.711390,-0.548390,-0.510748,0.302741,0.514975,0.274682,-0.186507
1,-0.804687,1.000000,0.809713,-0.513499,-0.333820,0.730933,0.128638,-0.690074,-0.602887,0.041450,0.372479,-0.132322,0.069779,0.327334
2,-0.504940,-1.000000,0.653082,0.824197,-0.519467,0.663360,0.034381,-0.798436,-0.020096,0.645201,0.341786,0.263863,0.034381,-0.199190
3,-0.989817,0.684677,1.000000,0.216485,0.141648,-0.810937,-0.580149,-0.368756,-0.404577,0.450581,0.230827,0.534187,0.230827,-0.101217
4,1.000000,0.387540,0.144139,-0.719373,-0.514873,0.464567,-0.599237,-0.331383,0.527565,0.366999,0.016983,0.490243,0.016983,-0.155321
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4067,-0.993771,0.836526,1.000000,-0.534191,0.252336,-0.908023,0.152647,0.886370,0.418484,-0.201896,-0.246107,-0.338375,-0.163033,0.365775
4068,-0.952347,0.476263,1.000000,0.298776,-0.538213,-0.647816,-0.321285,0.397380,0.112570,-0.470330,0.073128,0.397380,0.309776,0.160732
4069,1.000000,-0.099828,-0.699701,0.664257,0.235914,-0.689266,-0.593664,-0.296308,0.563380,0.118482,0.061266,0.358623,0.017604,-0.208984
4070,1.000000,0.345186,-0.827315,0.237696,-0.504847,-0.622216,-0.254040,-0.478898,0.462554,0.237696,0.086343,-0.407238,-0.139384,0.151705


PROCESSING DATA SOURCES FOR CHARACTER: 5


100%|██████████| 3795/3795 [00:36<00:00, 104.31it/s]


FEATURES FOR CHARACTER 5


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1.000000,-0.310557,0.834060,0.452769,-0.839568,0.244158,-0.758968,-0.410121,0.170298,0.834432,-0.172645,-0.764127,-0.227959,0.087700
1,-1.000000,0.200181,-0.896165,-0.308064,0.912748,0.047161,0.721473,0.544476,0.702346,-0.526663,0.109394,0.353201,0.001005,-0.271630
2,-0.768124,0.651904,1.000000,0.000490,-0.861183,-0.278687,-0.721594,0.233138,0.534704,0.279668,-0.209769,-0.464805,0.131448,-0.030529
3,-0.825443,1.000000,0.958370,-0.621648,-0.598412,0.545939,0.711879,-0.206506,-0.105431,-0.440023,-0.436247,0.027011,0.322684,-0.128667
4,-1.000000,0.436984,0.492968,-0.858800,0.823957,-0.228514,-0.802815,-0.154570,0.651420,0.461632,-0.014077,0.469848,-0.290136,-0.194007
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3790,1.000000,-0.470534,-0.761471,0.683383,0.785826,0.080199,-0.774584,-0.011590,-0.368090,-0.496759,0.124947,0.392281,0.143304,-0.378745
3791,-0.573321,1.000000,0.972573,0.111111,-0.592644,0.458937,-0.302789,-0.645273,0.586100,0.033816,-0.418731,-0.159420,0.244715,-0.346216
3792,-0.949042,-0.433319,-1.000000,0.229134,0.956782,0.351433,0.875250,-0.423127,0.324905,0.704741,-0.388506,-0.535234,-0.266207,0.310666
3793,1.000000,-0.377381,-0.956808,0.333546,0.838106,0.298351,-0.710447,-0.335148,-0.224764,-0.419614,0.197568,0.425051,-0.076948,-0.018398


PROCESSING DATA SOURCES FOR CHARACTER: 6


100%|██████████| 4137/4137 [00:34<00:00, 121.41it/s]


FEATURES FOR CHARACTER 6


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,-1.000000,-0.675085,-0.445129,0.712093,0.327727,0.731909,-0.365862,-0.655268,0.691036,0.148414,0.294699,-0.523156,-0.111074,0.321418
1,-1.000000,0.244097,0.673458,-0.313723,0.673458,0.244097,-0.572338,-0.053407,0.301579,-0.406693,0.115639,0.368056,-0.101291,-0.251743
2,-0.910846,-0.610101,0.368163,1.000000,-0.227120,0.920629,-0.160978,-0.851994,0.558654,-0.419610,0.600986,0.333282,-0.108063,0.111043
3,-1.000000,-0.089609,-0.535993,-0.271898,0.546691,0.214205,0.270496,0.462780,0.292592,-0.238755,-0.088557,-0.338185,0.292592,0.059536
4,-1.000000,0.364481,0.664805,0.126651,-0.613527,0.007737,0.510216,-0.241984,0.189146,0.305023,0.070232,-0.289550,-0.227055,-0.111178
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4132,-1.000000,-0.029699,0.129220,0.611804,0.546637,-0.210474,-0.482781,-0.260690,0.468524,0.235885,0.083285,-0.406771,-0.200319,0.150449
4133,-1.000000,0.103267,0.686601,-0.249603,0.196171,0.701353,0.614831,0.252789,0.204715,-0.541811,-0.461723,-0.315392,0.076554,0.201524
4134,-1.000000,0.395800,-0.829271,0.225071,0.536557,-0.031022,0.451192,-0.287114,-0.459360,-0.002567,0.223554,0.168162,-0.060993,-0.201750
4135,-1.000000,-0.296460,0.753737,0.276474,0.250673,0.695694,0.460283,-0.310434,-0.336235,-0.436200,-0.210469,0.360318,-0.021820,-0.226590


PROCESSING DATA SOURCES FOR CHARACTER: 7


100%|██████████| 4401/4401 [00:30<00:00, 145.82it/s]


FEATURES FOR CHARACTER 7


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1.000000,-0.326621,-0.582936,0.364777,-0.382795,-0.508567,0.539068,-0.059766,-0.249367,0.425425,-0.431314,-0.120415,0.144851,0.182830
1,-0.448721,-1.000000,0.959623,0.056258,-0.360700,-0.647914,-0.536743,0.320323,-0.536743,-0.207806,0.431494,0.232301,-0.061427,0.425949
2,1.000000,-0.294271,0.645193,-0.116867,-0.389663,-0.442108,-0.513845,0.285248,-0.537499,-0.057733,0.349520,0.119671,-0.034855,0.267507
3,1.000000,0.056009,-0.375823,-0.670841,-0.395293,0.516780,0.060287,-0.624115,0.580949,0.230305,0.110907,0.458373,-0.453700,-0.047826
4,1.000000,-0.009943,-0.006866,-0.903678,-0.661410,0.419955,-0.515956,-0.132771,-0.108684,-0.452770,-0.134138,0.445410,0.332527,0.182380
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4396,-0.415255,-0.925392,1.000000,-0.127703,-0.659708,-0.513681,0.710516,0.200379,-0.640409,0.007390,-0.312328,0.374070,0.228043,0.393369
4397,1.000000,-0.061928,-0.444361,-0.702659,0.619905,0.140790,-0.335763,0.470206,-0.413954,-0.381207,-0.368342,0.014091,0.163791,0.318168
4398,0.967729,0.421795,-0.210329,-1.000000,0.520879,0.543663,-0.345738,-0.647937,-0.454065,-0.268791,0.000909,0.478666,-0.372820,0.218681
4399,1.000000,-0.525058,-0.404225,0.632811,-0.740910,0.099042,0.572984,-0.098043,-0.149658,-0.557906,-0.458423,-0.170307,0.155353,0.347743


PROCESSING DATA SOURCES FOR CHARACTER: 8


100%|██████████| 4063/4063 [00:28<00:00, 141.47it/s]


FEATURES FOR CHARACTER 8


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1.0,-0.053832,-0.712289,0.413156,-0.770662,-0.209495,0.571928,-0.014916,-0.323132,-0.287326,-0.154498,0.309381,0.182771,-0.053832
1,1.0,0.087310,-0.747445,0.078256,-0.448659,0.358933,-0.312847,-0.440847,0.447699,-0.066610,0.109678,-0.380486,0.049317,0.132580
2,-1.0,0.690494,0.915774,-0.573917,0.915774,-0.037500,-0.808423,-0.152446,-0.279669,0.491254,-0.233690,-0.497286,0.258937,-0.086763
3,-1.0,0.648203,-1.000000,0.046902,0.864561,-0.488467,0.796871,0.317663,-0.398699,-0.491104,-0.398699,0.363376,0.151966,-0.063864
4,1.0,-0.444532,-0.939697,0.427242,0.912823,0.100326,-0.569193,-0.095823,-0.394838,0.318270,0.389758,-0.052234,-0.046129,-0.270178
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4058,1.0,-0.547230,0.961582,0.336378,-0.908083,0.349184,-0.651964,-0.404536,0.316164,0.313328,-0.306204,0.221125,0.212436,-0.239888
4059,1.0,-0.251887,0.879193,0.140737,-0.803478,0.205455,-0.615796,-0.266987,0.516771,-0.402896,0.335561,0.321947,-0.078635,-0.001643
4060,-1.0,0.484170,0.926756,-0.538886,0.892654,-0.027358,-0.437319,0.688782,-0.744236,-0.027358,0.278820,-0.375197,-0.130402,0.045718
4061,1.0,-0.807530,-0.672890,0.736013,-0.828107,0.089277,0.620582,-0.505720,-0.126758,0.434203,-0.310718,-0.221156,0.251018,-0.117678


PROCESSING DATA SOURCES FOR CHARACTER: 9


100%|██████████| 4188/4188 [00:25<00:00, 162.23it/s]


FEATURES FOR CHARACTER 9


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1.0,-0.363540,-0.670352,0.389384,-0.645044,-0.078821,0.525468,-0.047185,-0.290727,-0.331904,-0.214802,0.275496,0.177478,0.174263
1,1.0,0.355169,-0.254091,-0.733288,-0.627952,-0.345229,-0.590093,0.109083,-0.261979,0.481367,0.337461,0.355169,0.091375,-0.193792
2,1.0,0.549603,-0.860661,-0.147014,-0.580205,-0.644597,0.034989,-0.716973,-0.507829,0.433802,0.469244,-0.329246,0.324493,0.284225
3,1.0,0.296952,-0.826934,0.215755,-0.705138,-0.312026,-0.096160,-0.636814,0.336891,-0.454121,-0.299153,0.459346,0.211229,0.256353
4,1.0,-0.231756,-0.595469,0.348415,0.564872,0.000312,-0.479435,-0.014192,0.112339,-0.440617,-0.189350,0.406432,0.112339,0.151157
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4183,1.0,0.773533,-0.334482,-0.983423,-0.726779,-0.550890,-0.817309,0.183411,0.108110,-0.661538,0.027639,0.572355,0.128228,-0.098239
4184,1.0,0.490295,-0.509067,-0.689873,-0.750905,-0.133646,0.071343,-0.689873,0.430645,0.429490,0.361548,-0.206197,-0.186617,0.331220
4185,1.0,0.579237,-0.909640,-0.101271,0.013528,-0.913659,-0.577300,-0.692099,-0.424317,0.515934,0.382796,-0.433611,0.227703,0.386162
4186,1.0,0.436327,-0.521605,-0.599660,-0.728803,-0.185265,-0.456856,0.533450,-0.019799,-0.680596,0.164736,0.397477,0.203585,-0.185265
