# Importações

In [1]:
import json
import joblib
import sklearn.model_selection as ms
import pandas                  as pd
import numpy                   as np
import sklearn.metrics         as mt

# Base de dados

In [2]:
db_pe = pd.read_csv('datasets/pe-dataset.csv')

db_pe.head()

Unnamed: 0,panel_info,panel_eplet,locus_abc,locus_dr,locus_dq,locus_dp,panel_nc,panel_pc,panel_allele_count,panel_min_mfi,panel_max_mfi,reactive
0,0dfaab7bd101edb0c413798f4d19e9a1,102H,1,0,0,0,126,10803,1,483,483,0
1,0dfaab7bd101edb0c413798f4d19e9a1,103M,1,0,0,0,126,10803,1,778,778,0
2,0dfaab7bd101edb0c413798f4d19e9a1,107W,1,0,0,0,126,10803,4,6939,10219,1
3,0dfaab7bd101edb0c413798f4d19e9a1,113HD,1,0,0,0,126,10803,14,0,13247,0
4,0dfaab7bd101edb0c413798f4d19e9a1,113YD,1,0,0,0,126,10803,14,0,10472,0


# Selecionando atributos da base

In [3]:
instances_painel = db_pe.drop(columns=['panel_info', 'panel_eplet', 'reactive'])
instances_painel.head()

Unnamed: 0,locus_abc,locus_dr,locus_dq,locus_dp,panel_nc,panel_pc,panel_allele_count,panel_min_mfi,panel_max_mfi
0,1,0,0,0,126,10803,1,483,483
1,1,0,0,0,126,10803,1,778,778
2,1,0,0,0,126,10803,4,6939,10219
3,1,0,0,0,126,10803,14,0,13247
4,1,0,0,0,126,10803,14,0,10472


In [4]:
labels_painel = np.array(db_pe['reactive'])

print(labels_painel)

[0 0 1 ... 0 0 0]


In [24]:
labels_painel

array([0, 0, 1, ..., 0, 0, 0], dtype=int64)

In [5]:
db_pe['panel_info'].value_counts()

28f85621c895f6578ea05f6e03b499a8    250
1b1c09cb5bab0264ce057dc8889ddcbe    246
8da6126c1c996d05122425fe13196b69    241
d5cb15b7eceeb8965bb6937b1f34cfc1    235
b64cf228e00309ffed2febda8a0ae29c    215
17b6d6e71e8de8e14f1b1805b4a0efde    212
cf914f684d44e4604c5d12e5eecf2764    210
c317c42b6c1cdc62ac52680f4878b801    210
a950d2818765d899d0a6078906e18379    200
ddc6d40696e00e064d0a268f229a739d    194
325efd0775fd3b2288584934bfb86ce9    194
d72848c6b111b193f5ed958b2006b4f5    194
469969430db09698ffea54260d7a392a    192
756a4732e561e2e34945a2748749649f    192
6773fb3c8012e79f5ab04362675cc77f    192
c1b7ae96c751da0fec857a809b630577    192
1b0946f2e38b96dae4ee1509824478bd    192
21ccaed0ca78c37210c56bfbc667005b    192
560ce787cfd2b8f4b3c392670406b276    190
5639c77069a92483b2aeb1c9ea9d205b    189
a6475fc907691896b42c8323b12e4844    189
86c2ae8ad2fd53a6dfc69c484d4594bc    189
423d07894993f0761189d178a5dee511    189
2b8ad012c2f1dadc8e225d52808e236c    189
98885cb05f04f6d492cbb85504db9787    187


# Carregando o Modelo Persistido

In [6]:
# Modelo treinado com a base de SP
joblib_model = joblib.load("persisted_model/joblib_final_model.pkl")
print(joblib_model)

RandomForestClassifier(max_features='sqrt', min_samples_leaf=4,
                       min_samples_split=8, n_estimators=400)


# Classificando Epítopos

In [7]:
cv = 10
scoring = ['roc_auc', 'accuracy']

predicted_labels_painel = ms.cross_val_predict(joblib_model, instances_painel, labels_painel, cv=cv, n_jobs=-1)

print()
print('First step:\n')
print('- Confusion matrix (TN FP FN TP):', mt.confusion_matrix(labels_painel, predicted_labels_painel).ravel())
print("- AUC-ROC: %0.2f" % (mt.roc_auc_score(labels_painel, predicted_labels_painel)*100))
print("- Accuracy: %0.2f" % (mt.accuracy_score(labels_painel, predicted_labels_painel)*100))

scores = ms.cross_validate(joblib_model, instances_painel, labels_painel, cv=cv, n_jobs=-1, scoring=scoring)

print()
print('Second step:\n')
print("- AUC-ROC:  %0.2f (+/- %0.2f)" % (scores['test_roc_auc'].mean()*100, scores['test_roc_auc'].std()*100))
print("- Accuracy: %0.2f (+/- %0.2f)" % (scores['test_accuracy'].mean()*100, scores['test_accuracy'].std()*100))


First step:

- Confusion matrix (TN FP FN TP): [77584   397  3103   315]
- AUC-ROC: 54.35
- Accuracy: 95.70

Second step:

- AUC-ROC:  89.59 (+/- 1.23)
- Accuracy: 95.69 (+/- 0.17)


In [8]:
predicted_labels_df = pd.DataFrame(data=predicted_labels_painel.flatten())

predicted_labels_df[0].value_counts()

0    80687
1      712
Name: 0, dtype: int64

In [9]:
def returnReactiveEplets(labels):
    painels = []
    predict_eplet = []
    painel_number = db_pe['panel_info'][0]                          # Recebe o identificador do primeiro painel
    first_react_eplet = 1                                           # Flag para controlarmos o cálculo do cutoff
    cutoff = 0                                                                                                                
    
    for index in range(0, len(labels)):                             # Percorre todas as instâncias da base
        if(db_pe['panel_info'][index] == painel_number):            # Se ainda estivermos no mesmo painel
            
            if(labels[index] == 1):                                 # Se o epítopo for classificado como reativo
                predict_eplet.append(db_pe['panel_eplet'][index])   # Adiconamos à lista de epítopos classificados como reativos
                
                if(first_react_eplet):                              # Verificamos se é o primeiro epítopo reativo daquele painel
                    cutoff = db_pe['panel_min_mfi'][index]          # Inicializamos o valor do cutoff
                    first_react_eplet = 0                           # Atualizamos o valor da flag
                    
                elif(db_pe['panel_min_mfi'][index] < cutoff):       # Verificamos se o cutoff do eplet é o menor cutoff
                    cutoff = db_pe['panel_min_mfi'][index]          # Atualizamos o valor do cutoff
        else:                                                       
            results = {                                             # Se mudarmos de painel, resetamos as análises
                'result_sugestion': {"cutoff": ""},
                'result_user': {
                    'analysis': [],
                    'cutoff': ''
                }
            }
            results['result_user']['analysis'] = predict_eplet     # Preenchemos a lista de epítopos classificados como reativos
            results['result_user']['cutoff'] = str(cutoff)         # Preenchemos o valor do cutoff
            painels.append(results)                                # Adicionamos à lista de resultados
            
            painel_number = db_pe['panel_info'][index]             # Atualizamos o identificador do painel
            predict_eplet = []                                     # Resetamos a lista de epítopos classificados como reativos
            cutoff = 0                                             # Resetamos o valor do cutoff
            first_react_eplet = 1                                  # Resetamos a flag para controlarmos o cálculo do cutoff
            
    return painels

In [10]:
def printResults(panels):
    for index in range(0, len(panels)):
        print("Painel: " + str(index))
        print(panels[index])
        print("\n")

In [11]:
panels = returnReactiveEplets(predicted_labels_painel)
printResults(panels)

Painel: 0
{'result_sugestion': {'cutoff': ''}, 'result_user': {'analysis': ['82LR'], 'cutoff': '1069'}}


Painel: 1
{'result_sugestion': {'cutoff': ''}, 'result_user': {'analysis': ['52PL<sub>3</sub>', '55PP', '55PPD', '61FT<sub>4</sub>', '66IL', '76L', '84QL<sub>3</sub>'], 'cutoff': '1889'}}


Painel: 2
{'result_sugestion': {'cutoff': ''}, 'result_user': {'analysis': ['82LR'], 'cutoff': '1212'}}


Painel: 3
{'result_sugestion': {'cutoff': ''}, 'result_user': {'analysis': [], 'cutoff': '0'}}


Painel: 4
{'result_sugestion': {'cutoff': ''}, 'result_user': {'analysis': ['25R', '46VY<sub>3</sub>'], 'cutoff': '8546'}}


Painel: 5
{'result_sugestion': {'cutoff': ''}, 'result_user': {'analysis': ['76ESN'], 'cutoff': '2785'}}


Painel: 6
{'result_sugestion': {'cutoff': ''}, 'result_user': {'analysis': [], 'cutoff': '0'}}


Painel: 7
{'result_sugestion': {'cutoff': ''}, 'result_user': {'analysis': ['79GT'], 'cutoff': '447'}}


Painel: 8
{'result_sugestion': {'cutoff': ''}, 'result_user': {'ana

{'result_sugestion': {'cutoff': ''}, 'result_user': {'analysis': [], 'cutoff': '0'}}


Painel: 244
{'result_sugestion': {'cutoff': ''}, 'result_user': {'analysis': [], 'cutoff': '0'}}


Painel: 245
{'result_sugestion': {'cutoff': ''}, 'result_user': {'analysis': [], 'cutoff': '0'}}


Painel: 246
{'result_sugestion': {'cutoff': ''}, 'result_user': {'analysis': [], 'cutoff': '0'}}


Painel: 247
{'result_sugestion': {'cutoff': ''}, 'result_user': {'analysis': [], 'cutoff': '0'}}


Painel: 248
{'result_sugestion': {'cutoff': ''}, 'result_user': {'analysis': ['152E', '95L'], 'cutoff': '881'}}


Painel: 249
{'result_sugestion': {'cutoff': ''}, 'result_user': {'analysis': [], 'cutoff': '0'}}


Painel: 250
{'result_sugestion': {'cutoff': ''}, 'result_user': {'analysis': ['163L'], 'cutoff': '443'}}


Painel: 251
{'result_sugestion': {'cutoff': ''}, 'result_user': {'analysis': [], 'cutoff': '0'}}


Painel: 252
{'result_sugestion': {'cutoff': ''}, 'result_user': {'analysis': ['113H'], 'cutoff': '