In [None]:
import pandas as pd
import numpy as np
import math
import os
import seaborn as sns
import matplotlib.pyplot as plt
import re

from sklearn.datasets import load_digits
from scipy.stats import chi2_contingency

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from xgboost import XGBClassifier

## Select K-Best
from sklearn.feature_selection import SelectKBest
## f_regression
from sklearn.feature_selection import f_regression
from sklearn.feature_selection import chi2
from sklearn.feature_selection import mutual_info_classif
from sklearn.feature_selection import mutual_info_regression
from sklearn.feature_selection import f_classif
from sklearn.metrics import precision_recall_fscore_support

from xgboost import XGBClassifier
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.neighbors import KNeighborsClassifier
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_selection import VarianceThreshold

##### Função para criação da característica faixa_etaria

In [None]:
def faixa_etaria_ibge(x):
    if x < 20:
        return '19 ou menos'
    elif x < 25:
        return 'Entre 20 e 24'
    elif x < 30:
        return 'Entre 25 e 29'
    elif x < 35:
        return 'Entre 30 e 34'
    elif x < 40:
        return 'Entre 35 e 39'
    elif x < 45:
        return 'Entre 40 e 44'
    elif x < 50:
        return 'Entre 45 e 49'
    elif x < 55:
        return 'Entre 50 e 54'
    elif x < 60:
        return 'Entre 55 e 59'
    elif x < 65:
        return 'Entre 60 e 64'
    elif x < 70:
        return 'Entre 65 e 69'
    elif x < 75:
        return 'Entre 70 e 74'
    else:
        return 'Mais de 75'

In [None]:
def corrige_media(x, y):
    if (x > 10) & (x <= 20):
        return y
    elif (x > 20) & (x < 50):
        return x*2/10
    elif (x >= 50) & (x <= 100):
        return x/10
    elif (x > 100) & (x <= 500):
        return y
    elif (x > 500) & (x <= 1000):
        return x/100
    elif (x > 1000) & (x <= 10000):
        return x/1000
    elif (x > 10000):
        return y
    else:
        return x

##### Função para realizar o teste de predição com os algoritmos de aprendizado de máquina

In [None]:
def preditor_teste(x, y):

    X_0, X_1, y_0, y_1 = train_test_split(x, y, random_state=0, test_size=0.25)
    
    algoritmo = XGBClassifier(learning_rate=0.1, n_estimators=100, random_state=0)
    
    algoritmo.fit(X_0, y_0)
    
    pred_treino = algoritmo.predict(X_0)
    pred_teste = algoritmo.predict(X_1)
    
    print('XGBoost\n')
    
    print(classification_report(y_1, pred_teste))

    #sns.heatmap(confusion_matrix(y_1, pred_teste), annot=True, cmap='Blues', fmt='')
    
    algoritmo = LogisticRegression(C=0.1, max_iter=1000)

    algoritmo.fit(X_0, y_0)
    
    pred_treino = algoritmo.predict(X_0)
    pred_teste = algoritmo.predict(X_1)
    
    print('Regressão Logística\n')
    
    print(classification_report(y_1, pred_teste))

    #sns.heatmap(confusion_matrix(y_1, pred_teste), annot=True, cmap='Blues', fmt='')

    algoritmo = RandomForestClassifier(max_depth=3, random_state=0)
    
    algoritmo.fit(X_0, y_0)
    
    pred_treino = algoritmo.predict(X_0)
    pred_teste = algoritmo.predict(X_1)
    
    print('Random Forest\n')
    
    print(classification_report(y_1, pred_teste))

    #sns.heatmap(confusion_matrix(y_1, pred_teste), annot=True, cmap='Blues', fmt='')
    
    algoritmo = svm.SVC()
    
    algoritmo.fit(X_0, y_0)
    
    pred_treino = algoritmo.predict(X_0)
    pred_teste = algoritmo.predict(X_1)
    
    print('Support Vector Machine\n')
    
    print(classification_report(y_1, pred_teste))

    #sns.heatmap(confusion_matrix(y_1, pred_teste), annot=True, cmap='Blues', fmt='')
    
    algoritmo = KNeighborsClassifier(n_neighbors=5)
    
    algoritmo.fit(X_0, y_0)
    
    pred_treino = algoritmo.predict(X_0)
    pred_teste = algoritmo.predict(X_1)
    
    print('K-Nearest Neighbors \n')
    
    print(classification_report(y_1, pred_teste))

    #sns.heatmap(confusion_matrix(y_1, pred_teste), annot=True, cmap='Blues', fmt='')
    
    algoritmo = DecisionTreeClassifier()
    algoritmo.fit(X_0, y_0)
    
    pred_treino = algoritmo.predict(X_0)
    pred_teste = algoritmo.predict(X_1)
    
    print('Árvore de Decisão \n')
    
    print(classification_report(y_1, pred_teste))

    #sns.heatmap(confusion_matrix(y_1, pred_teste), annot=True, cmap='Blues', fmt='')

In [None]:
df_a = pd.read_csv('dataset_alunos.csv', low_memory=False)

In [None]:
df_a.drop('Unnamed: 0', axis=1, inplace=True)

In [None]:
df_a.columns

##### Criação da característica faixa_etaria

In [None]:
df_a['faixa_etaria'] = df_a['idade'].apply(faixa_etaria)

##### Colunas que passarão pelo processo de one hot encoding

In [None]:
colunas_alunos = ['cod_curso', 'rendaf', 'rendai', 
                  'escolapai', 'escolamae', 'superior', 
                  'faixa_etaria', 'genero', 'unidadeensino', 'estado_civil',
                  'etnia', 'trajetoria', 'participacaof', 'atividadep']

In [None]:
df_alunos = pd.get_dummies(df_a[colunas_alunos], columns=colunas_alunos)

In [None]:
df_alunos['situacao'] = pd.Series(df_a['situacao'])
df_alunos['matricula'] = pd.Series(df_a['cod_matricula'])

##### Selecionando Evadidos e Não-Evadidos

In [None]:
df_formados = pd.DataFrame(df_alunos[(df_alunos['situacao'] == 'FO') | (df_alunos['situacao'] == 'FI')])
df_evadidos = pd.DataFrame(df_alunos[(df_alunos['situacao'] == 'CA')])

df_formados.drop('situacao', axis=1, inplace=True)
df_evadidos.drop('situacao', axis=1, inplace=True)
df_formados.drop('matricula', axis=1, inplace=True)
df_evadidos.drop('matricula', axis=1, inplace=True)

df_formados['label'] = 0
df_evadidos['label'] = 1

In [None]:
len(df_formados),len(df_evadidos)

##### Concatenando dataset com números iguais de amostras em ambas as classes

In [None]:
df = pd.concat([df_formados.sample(len(df_formados), random_state=0), df_evadidos.sample(len(df_formados), random_state=0)], axis=0)

In [None]:
x = df.drop('label', axis=1)
y = df['label']

In [None]:
del(df)

##### Eliminando características constantes

In [None]:
var_thr = VarianceThreshold(threshold = 0)
var_thr.fit(x)

In [None]:
concol = [column for column in x.columns if column not in x.columns[var_thr.get_support()]]

In [None]:
len(concol)/len(x.columns)*100 # Percentual de colunas constantes

In [None]:
x.drop(concol,axis=1, inplace=True)

In [None]:
len(x.columns)

##### Estudo de Caso 1 - teste com o Conjunto de Dados 1

In [None]:
preditor_teste(x,y)

### Carregando dados do histórico do estudante

In [None]:
df_historico = pd.read_csv('univesp/df_aluno_historico_full.csv', low_memory=False)

##### Remove as disciplinas que não possuem nota avaliativa - NaN(ex.: estágios)

In [None]:
df_h = pd.DataFrame(df_historico[~df_historico['mediafinal'].isna()])
df_h.reset_index(inplace=True)

##### Análise das disciplinas

In [None]:
df_h.columns, df_a.columns

In [None]:
df_ah = df_a.merge(df_h, left_on = 'cod_matricula', right_on = 'matricula', how='inner')

In [None]:
df_h.sit_disciplina.unique()

In [None]:
disciplinas_total = df_ah[['disciplina', 'curso', 'matricula']][(df_ah['periodo_grade'] == 1)].groupby(['disciplina','curso']).count().reset_index()

In [None]:
disciplinas_formados = df_ah[['disciplina', 'curso', 'matricula']][(df_ah['periodo_grade'] == 1) & ((df_ah['situacao'] == 'FO') | (df_ah['situacao'] == 'FI'))].groupby(['disciplina','curso']).count().reset_index()

In [None]:
disciplinas_evadidos = df_ah[['disciplina', 'curso', 'matricula']][(df_ah['periodo_grade'] == 1) & ((df_ah['situacao'] == 'CA'))].groupby(['disciplina','curso']).count().reset_index()

In [None]:
disciplinas_formados_reprovado = df_ah[['disciplina', 'curso', 'matricula']][(df_ah['periodo_grade'] == 1) & ((df_ah['situacao'] == 'FO') | (df_ah['situacao'] == 'FI')) & (df_ah['sit_disciplina'] == 'RE')].groupby(['disciplina','curso']).count().reset_index()

In [None]:
disciplinas_evadidos_reprovado = df_ah[['disciplina', 'curso', 'matricula']][(df_ah['periodo_grade'] == 1) & (df_ah['situacao'] == 'CA') & (df_ah['sit_disciplina'] == 'RE')].groupby(['disciplina','curso']).count().reset_index()

In [None]:
disciplinas_total.columns = ['disciplina', 'curso', 'total']

disciplinas_formados.columns = ['disciplina', 'curso', 'total_formados']
disciplinas_evadidos.columns = ['disciplina', 'curso', 'total_evadidos']

disciplinas_formados_reprovado.columns = ['disciplina', 'curso', 'total_formados_reprovados']
disciplinas_evadidos_reprovado.columns = ['disciplina', 'curso', 'total_evadidos_reprovados']

In [None]:
# disciplinas_evadidos_reprovado.sort_values('total_evadidos_reprovados', ascending=False).head(40)

In [None]:
df_disciplinas = disciplinas_total.merge(disciplinas_formados, how='outer', on=['disciplina', 'curso']).merge(disciplinas_evadidos, how='outer', on=['disciplina', 'curso']).fillna(0).merge(disciplinas_formados_reprovado, how='outer', on=['disciplina', 'curso']).fillna(0).merge(disciplinas_evadidos_reprovado, how='outer', on=['disciplina', 'curso']).fillna(0)

In [None]:
df_disciplinas['percentual_formados'] = df_disciplinas['total_formados']/df_disciplinas['total']
df_disciplinas['percentual_evadidos'] = df_disciplinas['total_evadidos']/df_disciplinas['total']

df_disciplinas['proporcao_evadidos'] = df_disciplinas['total_evadidos']/(df_disciplinas['total_evadidos'] + df_disciplinas['total_formados'])

In [None]:
# df_disciplinas

In [None]:
# df_disciplinas[['disciplina', 'curso', 'percentual_evadidos']].sort_values('percentual_evadidos', ascending=False).head(50)

In [None]:
# df_disciplinas[['disciplina', 'curso', '' 'proporcao_evadidos']][df_disciplinas['total_formados'] > 10].sort_values('proporcao_evadidos', ascending=False).head(40)

##### Colocar todas as médias e notas em um mesmo range (0 - 10)

In [None]:
media_medias = df_h['mediafinal'].mean()

In [None]:
df_h['mediafinal'] = df_h['mediafinal'].apply(lambda x: corrige_media(x, media_medias))

##### Conferindo

In [None]:
df_h[['matricula', 'disciplina', 'mediafinal', 'sit_matricula', 'sit_disciplina']][(df_h['mediafinal'] > 10)&(df_h['mediafinal'] <= 100000)]

##### Preenche com 0 para quem não possui pontos no processo seletivo

In [None]:
df_h['totalpontoprocseletivo'].fillna(0, inplace=True)

In [None]:
colunas_h = ['matricula', 'freguencia', 'mediafinal', 'totalpontoprocseletivo', 'cod_disciplina', 'disciplina']

In [None]:
del(df_historico)

##### Normalizando a Frequência

In [None]:
df_h['freguencia'][df_h['freguencia'] <= 100].mean()

In [None]:
def frequencia_media(f):
    if f > 100:
        return 99
    elif (f <= 10) & (f > 1):
        return f*10
    elif f == 1:
        return f*100
    else:
        return f

In [None]:
df_h['freguencia'] = df_h['freguencia'].apply(frequencia_media)

In [None]:
df_h[df_h['periodo_grade'].isnull()]

##### Selecionando somente o primeiro semestre do curso

In [None]:
df_historico = pd.DataFrame(df_h[colunas_h][(df_h['periodo_cursou'] <= 1) & (df_h['periodo_grade'] == 1)])

##### Conferir quantos registros sobram com esta seleção

In [None]:
df_a[['cod_matricula', 'situacao']][(df_a['situacao'] == 'FO') | (df_a['situacao'] == 'FI')].merge(df_historico, how='inner', left_on='cod_matricula', right_on='matricula').groupby('cod_matricula').count().sort_values('mediafinal', ascending=True).tail(10)

##### Selecionar apenas os dados das situações FO, FI e CA

In [None]:
df_historico.columns, df_a.columns

In [None]:
df_hn = pd.DataFrame(df_a[['cod_matricula', 'situacao']][(df_a['situacao'] == 'FO') | (df_a['situacao'] == 'FI') | (df_a['situacao'] == 'CA')].merge(df_historico, how='inner', left_on='cod_matricula', right_on='matricula')) #.groupby('matricula').count().sort_values('mediafinal', ascending=True).head(10)

In [None]:
len(df_h2)

In [None]:
colunas_agrupar = ['disciplina', 'freguencia', 'mediafinal']

In [None]:
colunas_todas = []
colunas_todas.append('matricula')
colunas_todas.append('totalpontoprocseletivo')
for i in colunas_agrupar:
    for x in range(1,9):
        colunas_todas.append(i + '_' + str(x))

In [None]:
len(colunas_todas)

In [None]:
df_h_full = pd.DataFrame(columns=colunas_todas)

In [None]:
matriculas = df_hn['matricula'].unique()

In [None]:
df_h_full.columns

#### Juntando as disciplinas por matrícula

In [None]:
novo_dataset = []
for m in matriculas:
    nova_linha = []
    # Primeiro registro -> matricula
    nova_linha.append(m)
    # Segundo registro -> total pontos no processo seletivo
    nova_linha.append(df_hn['totalpontoprocseletivo'][df_hn['matricula'] == m].mean())
    for c in colunas_agrupar:
        d = 0
        for i in df_hn[c][df_hn['matricula'] == m]:
            if(d < 8):
                nova_linha.append(i)
                d = d + 1
        #print(str(d) + '\n')
        if(d < 8):
            r = 8 - d
            for i in range(r):
                # Se a coluna atual for disciplina, completar com 0
                if c == 'disciplina':
                    nova_linha.append(0)
                    d = d + 1
                else:
                    nova_linha.append(int(df_hn[c][df_hn['matricula'] == m].mean()))
                    d = d + 1
        #print('\n' + str(d) + '\n')
    novo_dataset.append(nova_linha)

In [None]:
len(novo_dataset)

In [None]:
df_h_full = pd.DataFrame(novo_dataset, columns=colunas_todas)

In [None]:
df_h_full.columns

##### Factorize dos dados categóricos do dataset do histórico

In [None]:
disciplina = ['disciplina_1', 'disciplina_2', 'disciplina_3', 'disciplina_4', 'disciplina_5', 'disciplina_6', 'disciplina_7', 'disciplina_8']

In [None]:
for d in disciplina:
    n_fac, n_uniq = pd.factorize(df_h_full[d])
    n_fac = n_fac + 1
    df_h_full[d] = pd.Series(n_fac)

In [None]:
df_h_full

In [None]:
df_h_a_full = df_h_full.merge(df_a, how='inner', left_on='matricula', right_on='cod_matricula')

In [336]:
df_h_a_full.columns

Index(['matricula', 'totalpontoprocseletivo', 'disciplina_1', 'disciplina_2',
       'disciplina_3', 'disciplina_4', 'disciplina_5', 'disciplina_6',
       'disciplina_7', 'disciplina_8', 'freguencia_1', 'freguencia_2',
       'freguencia_3', 'freguencia_4', 'freguencia_5', 'freguencia_6',
       'freguencia_7', 'freguencia_8', 'mediafinal_1', 'mediafinal_2',
       'mediafinal_3', 'mediafinal_4', 'mediafinal_5', 'mediafinal_6',
       'mediafinal_7', 'mediafinal_8', 'cod_curso', 'cod_matricula', 'idade',
       'genero', 'estado_civil', 'cod_municipio', 'etnia', 'trajetoria',
       'unidadeensino', 'superior', 'atividadep', 'participacaof', 'rendai',
       'rendaf', 'escolapai', 'escolamae', 'curso', 'situacao', 'faixa_etaria',
       'disciplina_9', 'disciplina_10', 'disciplina_11', 'disciplina_12',
       'disciplina_13', 'disciplina_14', 'disciplina_15', 'disciplina_16',
       'disciplina_17', 'disciplina_18', 'disciplina_19', 'disciplina_20',
       'disciplina_21', 'disciplina

In [None]:
df_h_a_full.groupby('situacao').count()

##### OneHotEncoding

In [None]:
colunas_d = ['disciplina_1', 'disciplina_2', 'disciplina_3', 'disciplina_4', 
             'disciplina_5', 'disciplina_6', 'disciplina_7', 'disciplina_8']

In [None]:
disciplinas = pd.concat([df_h_a_full['disciplina_1'],df_h_a_full['disciplina_2'],df_h_a_full['disciplina_3'],
                         df_h_a_full['disciplina_4'],df_h_a_full['disciplina_5'],df_h_a_full['disciplina_6'],
                         df_h_a_full['disciplina_7'],df_h_a_full['disciplina_8'],],axis=0).unique()

In [None]:
dummies = []

for d in disciplinas:
    if int(d) > 0:
        dummies.append('disciplina_' + str(d))

In [None]:
def disciplina_dummies(x_array, d):
    if int(d.split('_')[1]) in x_array:
        return 1
    else:
        return 0

In [None]:
for d in dummies:
    df_h_a_full[d] = df_h_a_full.apply(lambda x: disciplina_dummies([x.disciplina_1, x.disciplina_2, x.disciplina_3,
                                                       x.disciplina_4,x.disciplina_5, x.disciplina_6,
                                                       x.disciplina_7, x.disciplina_8], d), axis=1)

In [None]:
df_a.columns

In [None]:
colunas_alunos = ['cod_curso', 'genero', 'estado_civil', 'etnia', 'trajetoria', 'unidadeensino', 'superior',
       'atividadep', 'participacaof', 'rendai', 'rendaf', 'escolapai', 'escolamae', 'faixa_etaria']

In [None]:
df_h_a_one = pd.get_dummies(df_h_a_full, columns=colunas_alunos)

In [None]:
df_h_a_one.drop('curso', axis=1, inplace=True)

In [340]:
def label(x):
    if x == 'CA':
        return 1
    else:
        return 0        

In [341]:
df_h_a_one['label'] = df_h_a_one['situacao'].apply(label)

In [342]:
df_h_a_one.drop('situacao', axis=1, inplace=True)

##### Divisão dos dados

In [343]:
df_formados = pd.DataFrame(df_h_a_one[(df_h_a_one['label'] == 0)])
df_evadidos = pd.DataFrame(df_h_a_one[(df_h_a_one['label'] == 1)])

In [344]:
len(df_formados), len(df_evadidos)

(10012, 38478)

In [358]:
df = pd.concat([df_formados.sample(len(df_formados), random_state=0), df_evadidos.sample(len(df_formados), random_state=0)], axis=0)
df = df.fillna(0)

In [359]:
x = df.drop('label', axis=1)
y = df['label']

##### Eliminando variáveis constantes

In [360]:
var_thr = VarianceThreshold(threshold = 0)
var_thr.fit(x)

concol = [column for column in x.columns if column not in x.columns[var_thr.get_support()]]

len(concol)/len(x.columns)*100

10.771992818671453

In [361]:
x.drop(concol,axis=1, inplace=True)

##### Colocar os dados em uma mesma escala

In [362]:
scaler_mm = MinMaxScaler().fit(x)

In [363]:
x_scaled = scaler_mm.transform(x)

##### Testando com os algoritmos e todas as features

In [364]:
preditor_teste(x_scaled, y)

  if is_sparse(data):


XGBoost

              precision    recall  f1-score   support

           0       0.86      0.95      0.91      2507
           1       0.95      0.85      0.90      2499

    accuracy                           0.90      5006
   macro avg       0.91      0.90      0.90      5006
weighted avg       0.91      0.90      0.90      5006

Regressão Logística

              precision    recall  f1-score   support

           0       0.84      0.93      0.89      2507
           1       0.92      0.83      0.87      2499

    accuracy                           0.88      5006
   macro avg       0.88      0.88      0.88      5006
weighted avg       0.88      0.88      0.88      5006

Random Forest

              precision    recall  f1-score   support

           0       0.79      0.97      0.87      2507
           1       0.96      0.75      0.84      2499

    accuracy                           0.86      5006
   macro avg       0.88      0.86      0.86      5006
weighted avg       0.88      

In [365]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [369]:
def best_features(n_features, x, y):
    
    melhores_features = SelectKBest(f_classif, k=n_features)
    melhores_features.fit(x, y)
    cols_idxs = melhores_features.get_feature_names_out(x.columns)
    scores = melhores_features.scores_
    
    df_x = pd.DataFrame(data=melhores_features.transform(x), columns=cols_idxs)
    
    X_0, X_1, y_0, y_1 = train_test_split(df_x, y, random_state=0, test_size=0.25)
    
    a = XGBClassifier(learning_rate=0.1, n_estimators=100, random_state=0)
    #a = DecisionTreeClassifier()
    #a = LogisticRegression(C=0.1, max_iter=10000)
    
    a.fit(X_0, y_0)

    pred_treino_x = a.predict(X_0)
    pred_teste_x = a.predict(X_1)

    result = precision_recall_fscore_support(pred_teste_x, y_1)

    recall_0, recall_1 = recallculator(pred_teste_x, y_1)
    
    f_importances = a.feature_importances_
    #f_importances = a.coef_
    n_features = n_features
    features_names = a.feature_names_in_
    score = a.score(X_1, y_1)
    
    return f_importances, n_features, features_names, score, recall_0, recall_1, pred_teste_x, y_1

In [375]:
def recallculator(predicao, y):
    vp = 0
    vn = 0
    fp = 0
    fn = 0
    y = list(y)
    if len(predicao) == len(y):
        for i in range(0,len(y)):
            if((predicao[i] == 1) & (y[i] == 1)):
                vp = vp + 1
            elif((predicao[i] == 0) & (y[i] == 1)):
                fn = fn + 1
            elif((predicao[i] == 1) & (y[i] == 0)):
                fp = fp + 1
            elif((predicao[i] == 0) & (y[i] == 0)):
                vn = vn + 1
        total_1 = sum(y)
        total_0 = len(y) - total_1
        recall_0 = vn / total_0
        recall_1 = vp / total_1

        return recall_0, recall_1

In [373]:
new_x = pd.DataFrame(x_scaled, columns=x.columns)

In [376]:
f_importances = []
n_features = []
features_names = []
score = []
recall_0 = []
recall_1 = []
predicao = []
x_teste = []

for i in range(10,len(new_x.columns)):
    
    r = best_features(i, new_x, y)
    f_importances.append(r[0])
    n_features.append(r[1])
    features_names.append([r[2]])
    score.append(r[3])
    recall_0.append(r[4])
    recall_1.append(r[5])
    predicao.append(r[6])
    x_teste.append(r[7])    
    
    print('Features: ' + str(r[1]) + ' - Score: ' + str(r[3].round(3)) + ' - Recall 0: ' + str(r[4]) + ' - Recall 1: ' + str(r[5]))

Features: 10 - Score: 0.856 - Recall 0: 0.9437574790586358 - Recall 1: 0.7671068427370948
Features: 11 - Score: 0.868 - Recall 0: 0.9429597128041484 - Recall 1: 0.7931172468987595
Features: 12 - Score: 0.87 - Recall 0: 0.9453530115676106 - Recall 1: 0.7943177270908364
Features: 13 - Score: 0.869 - Recall 0: 0.9433585959313922 - Recall 1: 0.7935174069627852
Features: 14 - Score: 0.869 - Recall 0: 0.9433585959313922 - Recall 1: 0.7935174069627852
Features: 15 - Score: 0.871 - Recall 0: 0.9401675309134424 - Recall 1: 0.801920768307323
Features: 16 - Score: 0.876 - Recall 0: 0.9485440765855604 - Recall 1: 0.8031212484993998
Features: 17 - Score: 0.876 - Recall 0: 0.9485440765855604 - Recall 1: 0.8031212484993998
Features: 18 - Score: 0.875 - Recall 0: 0.9445552453131233 - Recall 1: 0.8059223689475791
Features: 19 - Score: 0.874 - Recall 0: 0.9389708815317112 - Recall 1: 0.8091236494597839
Features: 20 - Score: 0.875 - Recall 0: 0.9441563621858795 - Recall 1: 0.8047218887555022
Features: 21