In [147]:
import sys
if '../' not in sys.path:
    sys.path.append('../')


import pandas as pd
from typing import List
import importlib
import string

import mplusdb
import utils

In [67]:
importlib.reload(mplusdb)

<module 'mplusdb' from '../mplusdb.py'>

In [73]:
mdb = mplusdb.MplusDatabase(config_file_path = "../config/db_config.ini")
query = "select composition, count(level), AVG(level) from run group by composition order by count(level);"
ret = mdb.send_query_to_mdb(query=query, isfetch=True)

In [102]:
comp = pd.DataFrame(ret)
comp.columns = ["comp", "count_runs", "avg_key_level"]
comp = comp.dropna()

In [103]:
comp

Unnamed: 0,comp,count_runs,avg_key_level
0,Khowz,1,2.0000
1,mtvvx,1,16.0000
2,Kosuv,1,15.0000
3,ccfjs,1,2.0000
4,Bjptw,1,10.0000
...,...,...,...
84002,fijot,909,13.5974
84003,ijotx,913,13.4951
84004,dijtx,926,13.5335
84005,dijjt,964,13.6753


In [161]:
CLASS_INDEX

{'a': 0,
 'b': 1,
 'c': 2,
 'd': 3,
 'e': 4,
 'f': 5,
 'g': 6,
 'h': 7,
 'i': 8,
 'j': 9,
 'k': 10,
 'l': 11,
 'm': 12,
 'n': 13,
 'o': 14,
 'p': 15,
 'q': 16,
 'r': 17,
 's': 18,
 't': 19,
 'u': 20,
 'v': 21,
 'w': 22,
 'x': 23,
 'y': 24,
 'z': 25,
 'A': 26,
 'B': 27,
 'C': 28,
 'D': 29,
 'E': 30,
 'F': 31,
 'G': 32,
 'H': 33,
 'I': 34,
 'J': 35,
 'K': 36,
 'L': 37,
 'M': 38,
 'N': 39,
 'O': 40,
 'P': 41,
 'Q': 42,
 'R': 43,
 'S': 44,
 'T': 45,
 'U': 46,
 'V': 47,
 'W': 48,
 'X': 49,
 'Y': 50,
 'Z': 51}

In [172]:
alphabet = string.ascii_lowercase[:30]+"ABCDFGHIJK"
CLASS_INDEX = dict(zip(alphabet, range(len(alphabet))))
def tryme(sig: str) -> List[int]:
    """Converts comp signature into a class vector."""
    vector = [0]*36
    for class_token in sig:
        index = CLASS_INDEX[class_token]
        vector[index] += 1
    return vector

res = comp.apply(lambda row: tryme(row["comp"]), axis = 1)

In [179]:
comp_matrix = pd.DataFrame(res.values.tolist())
comp_matrix.columns = list(alphabet)

In [181]:
search_table = pd.concat([comp, comp_matrix], axis = 1)

In [182]:
search_table

Unnamed: 0,comp,count_runs,avg_key_level,a,b,c,d,e,f,g,...,A,B,C,D,F,G,H,I,J,K
0,Khowz,1,2.0000,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,mtvvx,1,16.0000,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Kosuv,1,15.0000,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,ccfjs,1,2.0000,0,0,2,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
4,Bjptw,1,10.0000,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84002,fijot,909,13.5974,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
84003,ijotx,913,13.4951,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
84004,dijtx,926,13.5335,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
84005,dijjt,964,13.6753,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [203]:
mask = search_table[(search_table.v == 2)]

In [204]:
mask

Unnamed: 0,comp,count_runs,avg_key_level,a,b,c,d,e,f,g,...,A,B,C,D,F,G,H,I,J,K
1,mtvvx,1,16.0000,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
24,Kdjvv,1,15.0000,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
309,FKvvx,1,15.0000,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,1
396,Aopvv,1,15.0000,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
443,Kjovv,1,15.0000,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52951,Henvv,12,14.1667,0,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
53110,Hevvx,12,13.7500,0,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
54027,Heovv,12,15.0000,0,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
54668,Koovv,13,15.0000,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
