# Importações

In [1]:
import json
import joblib
import pandas as pd

# Base de Dados

#### Importando a entrada em formato .json:

In [2]:
with open('input/pe-dataset.json') as file:
    pe_dataset = json.load(file)

#### Transformando a entrada em um dataframe:

In [3]:
db_pe = pd.DataFrame.from_dict(pe_dataset, orient='index')

In [4]:
db_pe.head()

Unnamed: 0,panel_info,panel_eplet,locus_abc,locus_dr,locus_dq,locus_dp,panel_nc,panel_pc,panel_allele_count,panel_min_mfi,panel_max_mfi,reactive
0,0dfaab7bd101edb0c413798f4d19e9a1,102H,1,0,0,0,126,10803,1,483,483,0
1,0dfaab7bd101edb0c413798f4d19e9a1,103M,1,0,0,0,126,10803,1,778,778,0
10,0dfaab7bd101edb0c413798f4d19e9a1,11AV,1,0,0,0,126,10803,14,0,4456,0
100,0dfaab7bd101edb0c413798f4d19e9a1,71TD,1,0,0,0,126,10803,2,1069,3156,0
1000,2ee8a4c5dd41332767512ca6ce6c5cf3,66DR,0,0,1,0,148,13047,6,0,1047,0


#### Agrupando o dataframe por painéis:

In [5]:
db_pe = db_pe.sort_values(by='panel_info')

In [6]:
db_pe.head()

Unnamed: 0,panel_info,panel_eplet,locus_abc,locus_dr,locus_dq,locus_dp,panel_nc,panel_pc,panel_allele_count,panel_min_mfi,panel_max_mfi,reactive
7059,007a756c2784939fc12c1b09b448c175,9YL,0,0,0,1,6,6925,11,0,1374,0
6963,007a756c2784939fc12c1b09b448c175,23L,0,0,1,0,6,6925,2,2291,6306,0
6962,007a756c2784939fc12c1b09b448c175,185I,0,0,1,0,6,6925,10,2291,11832,0
6961,007a756c2784939fc12c1b09b448c175,175E,0,0,1,0,6,6925,17,1493,12696,0
6960,007a756c2784939fc12c1b09b448c175,160S,0,0,1,0,6,6925,1,13252,13252,0


In [7]:
db_pe.reset_index(level=0, inplace=True)

In [8]:
db_pe.head()

Unnamed: 0,index,panel_info,panel_eplet,locus_abc,locus_dr,locus_dq,locus_dp,panel_nc,panel_pc,panel_allele_count,panel_min_mfi,panel_max_mfi,reactive
0,7059,007a756c2784939fc12c1b09b448c175,9YL,0,0,0,1,6,6925,11,0,1374,0
1,6963,007a756c2784939fc12c1b09b448c175,23L,0,0,1,0,6,6925,2,2291,6306,0
2,6962,007a756c2784939fc12c1b09b448c175,185I,0,0,1,0,6,6925,10,2291,11832,0
3,6961,007a756c2784939fc12c1b09b448c175,175E,0,0,1,0,6,6925,17,1493,12696,0
4,6960,007a756c2784939fc12c1b09b448c175,160S,0,0,1,0,6,6925,1,13252,13252,0


# Selecionando atributos da base

#### Instâncias do painel:

In [9]:
instances_painel = db_pe.drop(columns=['index', 'panel_info', 'panel_eplet', 'reactive'])
instances_painel.head()

Unnamed: 0,locus_abc,locus_dr,locus_dq,locus_dp,panel_nc,panel_pc,panel_allele_count,panel_min_mfi,panel_max_mfi
0,0,0,0,1,6,6925,11,0,1374
1,0,0,1,0,6,6925,2,2291,6306
2,0,0,1,0,6,6925,10,2291,11832
3,0,0,1,0,6,6925,17,1493,12696
4,0,0,1,0,6,6925,1,13252,13252


#### Transformando os valores categóricos em numéricos:

In [10]:
for column in instances_painel.columns:
    instances_painel[column] = instances_painel[column].apply(lambda x:int(x))

# Carregando o Modelo Persistido

In [11]:
# Modelo treinado com a base de SP
joblib_model = joblib.load("persisted_model/joblib_final_model.pkl")
print(joblib_model)

RandomForestClassifier(max_features='sqrt', min_samples_leaf=4,
                       min_samples_split=8, n_estimators=400)


# Classificando Epítopos

In [12]:
predicted_labels_painel = joblib_model.predict(instances_painel)

In [13]:
def returnReactiveEplets(labels):
    painels = []
    predict_eplet = []
    painel_number = db_pe['panel_info'][0]                          # Recebe o identificador do primeiro painel
    first_react_eplet = 1                                           # Flag para controlarmos o cálculo do cutoff
    cutoff = 0                                                                                                                
    
    for index in range(0, len(labels)):                             # Percorre todas as instâncias da base
        if(db_pe['panel_info'][index] == painel_number):            # Se ainda estivermos no mesmo painel
            
            if(labels[index] == 1):                                 # Se o epítopo for classificado como reativo
                predict_eplet.append(db_pe['panel_eplet'][index])   # Adiconamos à lista de epítopos classificados como reativos
                
                if(first_react_eplet):                              # Verificamos se é o primeiro epítopo reativo daquele painel
                    cutoff = db_pe['panel_min_mfi'][index]          # Inicializamos o valor do cutoff
                    first_react_eplet = 0                           # Atualizamos o valor da flag
                    
                elif(db_pe['panel_min_mfi'][index] < cutoff):       # Verificamos se o cutoff do eplet é o menor cutoff
                    cutoff = db_pe['panel_min_mfi'][index]          # Atualizamos o valor do cutoff
        else:                                                       
            results = {                                             # Se mudarmos de painel, resetamos as análises
                    'painel': '',
                    'analysis': [],
                    'cutoff': ''
            }
            results['painel'] = painel_number                      # Preenchemos o identificador do painel
            results['analysis'] = predict_eplet                    # Preenchemos a lista de epítopos classificados como reativos
            results['cutoff'] = str(cutoff)                        # Preenchemos o valor do cutoff
            painels.append(results)                                # Adicionamos à lista de resultados
            
            painel_number = db_pe['panel_info'][index]             # Atualizamos o identificador do painel
            predict_eplet = []                                     # Resetamos a lista de epítopos classificados como reativos
            cutoff = 0                                             # Resetamos o valor do cutoff
            first_react_eplet = 1                                  # Resetamos a flag para controlarmos o cálculo do cutoff
            
    return painels

In [14]:
def printResults(panels):
    for index in range(0, len(panels)):
        print(panels[index])
        print("\n")

In [15]:
panels = returnReactiveEplets(predicted_labels_painel)
printResults(panels)

{'painel': '007a756c2784939fc12c1b09b448c175', 'analysis': ['23L', '185I', '175E', '160D', '160AE', '135G', '96Y<sub>2</sub>', '98E', '40ERV', '66IT', '66IL', '61FT<sub>4</sub>', '56PA', '55RL<sub>3</sub>', '55PPD', '55PPA', '55PP', '52PL<sub>3</sub>', '47QL<sub>5</sub>', '47KHL', '45GE<sub>3</sub>', '45EV', '70QRA', '4Q', '40GR<sub>3</sub>', '65LE', '75S<sub>3</sub>', '76L', '84QL<sub>3</sub>', '75IL', '57D'], 'cutoff': '11739'}


{'painel': '008c56d5f6c8a1b5dbe0aee2b506d185', 'analysis': ['57D', '65LE', '9H', '76I', '84DEAV', '76V', '37FV', '37FL<sub>2</sub>', '37F', '13FEY', '112Y', '37L', '125SQ', '98E', '96Y<sub>2</sub>', '78V<sub>2</sub>', '74E', '57V', '57A', '4Q', '70RE', '11M', '33EYA', '35LV', '55EA', '11L', '45GE<sub>3</sub>', '9YL', '135G'], 'cutoff': '10076'}


{'painel': '00e9b45a8c53eb7353213770d78b2e90', 'analysis': ['11L', '11M', '31Q', '111R', '9F', '66ER', '9H', '66EV', '67VG', '67VT', '70GT', '70RT', '74EL', '74SR<sub>3</sub>', '74SV<sub>2</sub>', '77T', '85VA', '85


{'painel': 'bcbfc140feb1bda057442edee286a088', 'analysis': ['160D', '96QK<sub>2</sub>', '96Y<sub>2</sub>', '98E', '135G', '167H<sub>2</sub>', '175E', '185I', '23L', '25FT', '96ES<sub>2</sub>', '74E', '70R', '70RE', '66ER', '180VMP', '30G', '181M', '45EV', '47KHL', '47QL<sub>5</sub>', '52PL<sub>3</sub>', '55PP', '55PPA', '55PPD', '55RL<sub>3</sub>', '56PS', '13FE', '13FEL', '70RT', '86G<sub>2</sub>', '40ERV', '4Q'], 'cutoff': '1024'}


{'painel': 'bd1d3eaef36d0219383273ad279bc10b', 'analysis': ['96Q', '96EV', '71A', '3P<sub>4</sub>', '98KN', '28H', '37S', '31I', '142M<sub>3</sub>', '13FE', '13FEL'], 'cutoff': '10753'}


{'painel': 'bd87445f7c45fdc4bdb23abdc4fb52ba', 'analysis': ['14W', '152T', '166ES', '163LW+65QIT', '163LS/G', '163EW+73TE', '163EW+66I', '158T', '156DA', '152W', '173K', '66IF', '180E', '177DT', '76ESI', '76ESN', '76ET', '65QIA+76ESN', '80I+69TNT', '76ES', '80TA', '80TLR', '91R', '95W', '65QIA', '76EG', '66IF+163TEW', '66IS', '66IY', '66I', '69AA+65QI', '69TNT', '69TNT+