# (MultiFiT) Portuguese Text Classifier 
### MultiFiT configuration
- **Architecture 4 QRNN with 1550 hidden parameters by layer, SentencePiece tokenizer (15 000 tokens)**
- **Hyperparameters and training method from the MultiFiT paper**

Notebook original:
- https://raw.githubusercontent.com/piegu/language-models/master/lm3-portuguese-classifier-TCU-jurisprudencia.ipynb


## Initialisation

In [None]:
!pip install sentencepiece

In [None]:
!pip install ninja

In [None]:
import torch
torch.cuda.current_device() # temque chamar essa função primeiro para evitar erro na inicializaçao do CUDA
torch.cuda.set_device(0)

In [None]:
torch.cuda.is_available()

In [None]:
torch.backends.cudnn.enabled

In [None]:

%reload_ext autoreload
%autoreload 2
%matplotlib inline

from fastai import *
from fastai.text import *
from fastai.callbacks import *

import matplotlib.cm as cm

In [None]:
bs=16

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#data_path = Config.data_path()
#import os
#data_path = os.getcwd()
#data_path

In [None]:
lang = 'pt'

In [None]:

path = f'/content/drive/My Drive/Colab Notebooks/Tweets_BC'


lm_fns3 = [f'{lang}_wt_sp15_multifit', f'{lang}_wt_vocab_sp15_multifit']
lm_fns3_bwd = [f'{lang}_wt_sp15_multifit_bwd', f'{lang}_wt_vocab_sp15_multifit_bwd']
print(path)
print(lm_fns3)
print(lm_fns3_bwd)

In [None]:
from sklearn.metrics import f1_score

@np_func
def f1(inp,targ): return f1_score(targ, np.argmax(inp, axis=-1), average='weighted')

In [None]:
# source: https://github.com/fastai/fastai/blob/master//fastai/layers.py#L300:7
# blog: https://bfarzin.github.io/Label-Smoothing/
class WeightedLabelSmoothingCrossEntropy(nn.Module):
    def __init__(self, weight, eps:float=0.1, reduction='mean'):
        super().__init__()
        self.weight,self.eps,self.reduction = weight,eps,reduction
        
    def forward(self, output, target):
        c = output.size()[-1]
        log_preds = F.log_softmax(output, dim=-1)
        if self.reduction=='sum': loss = -log_preds.sum()
        else:
            loss = -log_preds.sum(dim=-1)
            if self.reduction=='mean':  loss = loss.mean()
        return loss*self.eps/c + (1-self.eps) * F.nll_loss(log_preds, target, weight=self.weight, reduction=self.reduction)

In [None]:
import warnings
warnings.filterwarnings('ignore')  # "error", "ignore", "always", "default", "module" or "on

## Data

In [None]:
df = pd.read_pickle(path + '/df_processado.pkl')
df.info()
df2 = df[df['sent_manual'].fillna('nan').str.contains('N|E|S|C')].copy()

def corrige_label(label):
    if label == 'S' or label == 'E':
        return('N')
    else:
        return(label)

In [None]:
df2['sent_manual'] = df2['sent_manual'].apply(corrige_label)
df2['sent_manual'].value_counts()
lista_index = df2.index.values.copy()
lista_texto = df2.tweet_limpo.to_list().copy()
lista_label = df2.sent_manual.to_list().copy()

#TRANSFORMA PARA CAIXA BAIXA
corpus = lista_texto.copy()
for i in range(0,len(corpus)):
    corpus[i]=corpus[i].lower()


#REMOVE NUMEROS E PONTUACAO
for i in range(0,len(corpus)):
    corpus[i] = re.sub('[0-9]+', '', corpus[i])
    corpus[i] = re.sub(r'[^\w\s]','',corpus[i])
    corpus[i] = re.sub('º','',corpus[i])

# create a dataframe using texts and lables
trainDF = pd.DataFrame()
trainDF['text'] = corpus
#trainDF['text'] = lista_texto
trainDF['label'] = lista_label

trainDF['label'].value_counts()

    

## Fine-tuning "forward LM"

In [None]:
trainDF.sample(5)

In [None]:
#dest = path/'corpus2_100'
#(dest/'tmp').ls()
dest = path + '/corpus2_100'
dest


### Databunch

In [None]:
%%time
data_lm = (TextList.from_df(trainDF, path, cols='text', processor=SPProcessor.load(dest)) # os arquivos smp tem que estar dentro de uma pasta chamada 'tmp' em dest
    .split_by_rand_pct(0.2, seed=100)
    .label_for_lm()           
    .databunch(bs=bs, num_workers=1))

In [None]:
data_lm.train_ds

In [None]:
data_lm.valid_ds

In [None]:
data_lm.save(f'{path}/{lang}_databunch_lm_tweets_bc_sp15_multifit_v2')

### Training

In [None]:
data_lm = load_data(path, f'{lang}_databunch_lm_tweets_bc_sp15_multifit_v2', bs=bs)

In [None]:
config = awd_lstm_lm_config.copy()
config['qrnn'] = True
config['n_hid'] = 1550 #default 1152
config['n_layers'] = 4 #default 3

In [None]:
perplexity = Perplexity()

In [None]:
%%time
learn_lm = language_model_learner(data_lm, AWD_LSTM, config=config, pretrained_fnames=lm_fns3, drop_mult=1., 
                                  metrics=[error_rate, accuracy, perplexity])#.to_fp16() #RuntimeError: "bernoulli_scalar_cpu_" not implemented for 'Half' se usar to_fp16()


In [None]:
#learn_lm.model

In [None]:
# number of model parameters
#sum([p.numel() for p in learn_lm.model.parameters()])

#### Change loss function

In [None]:
learn_lm.loss_func

In [None]:
learn_lm.loss_func = FlattenedLoss(LabelSmoothingCrossEntropy)

In [None]:
learn_lm.loss_func

#### Training

In [None]:
learn_lm.lr_find()

In [None]:
learn_lm.recorder.plot()

In [None]:
lr = 2e-2
lr *= bs/48

wd = 0.1

In [None]:
learn_lm.fit_one_cycle(2, lr*10, wd=wd, moms=(0.8,0.7))

In [None]:
learn_lm.save(f'{lang}fine_tuned1_tweets_bc_sp15_multifit_v2')
learn_lm.save_encoder(f'{lang}fine_tuned1_enc_tweets_bc_sp15_multifit_v2')

In [None]:
learn_lm.unfreeze()
#learn_lm.fit_one_cycle(18, lr, wd=wd, moms=(0.8,0.7), callbacks=[ShowGraph(learn_lm)]) 
learn_lm.fit_one_cycle(10, lr, wd=wd, moms=(0.8,0.7), callbacks=[ShowGraph(learn_lm)]) 
#learn_lm.fit_one_cycle(18, lr, wd=wd, moms=(0.8,0.7))

In [None]:
learn_lm.save(f'{lang}fine_tuned2_lenerbr_sp15_multifit_v2')
learn_lm.save_encoder(f'{lang}fine_tuned2_enc_lenerbr_sp15_multifit_v2')

In [None]:
#learn_lm.load(f'{lang}fine_tuned2_lenerbr_sp15_multifit_v2');

epoch	train_loss	valid_loss	error_rate	accuracy	perplexity	time <br>
0	4.026235	4.850301	0.656977	0.343023	56.712563	00:19 <br>
1	4.007512	4.827883	0.653260	0.346740	55.668045	00:19 <br>
2	4.040335	4.828910	0.655378	0.344622	56.004532	00:19 <br>
3	4.103630	4.861111	0.657413	0.342587	57.734848	00:19 <br>
4	4.121545	4.874278	0.658659	0.341341	58.080311	00:19 <br>
5	4.085582	4.857046	0.657786	0.342213	57.604557	00:19 <br>
6	4.024629	4.895518	0.659178	0.340822	59.595253	00:19 <br>
7	3.918815	4.880876	0.652595	0.347405	58.826031	00:19 <br>
8	3.836871	4.919249	0.653364	0.346636	61.336811	00:19 <br>
9	3.691151	4.924926	0.649813	0.350187	61.226910	00:19 <br>
10	3.573541	4.957286	0.648422	0.351578	63.443111	00:19 <br>
11	3.403976	4.980371	0.646968	0.353032	64.674278	00:19 <br>
12	3.242382	4.987718	0.646823	0.353177	65.596115	00:19 <br>
13	3.114132	5.038701	0.645204	0.354797	68.497879	00:19 <br>
14	2.989200	5.055654	0.644705	0.355295	70.223915	00:19 <br>
15	2.921864	5.071135	0.644622	0.355378	71.220886	00:19 <br>
16	2.854782	5.077914	0.644913	0.355087	71.853859	00:19 <br>
17	2.835677	5.080024	0.644871	0.355129	71.992210	00:19 <br>

In [None]:
#learn_lm.unfreeze()
#learn_lm.fit_one_cycle(18, lr, wd=wd, moms=(0.8,0.7), callbacks=[ShowGraph(learn_lm)]) #ShowGraph gera ImportError: cannot import name '_png'

Save best LM learner and its encoder

In [None]:
learn_lm.save(f'{lang}fine_tuned_tweets_bc_sp15_multifit_v2')
learn_lm.save_encoder(f'{lang}fine_tuned_enc_tweets_bc_sp15_multifit_v2')

## Fine-tuning "backward LM"

### Databunch

In [None]:
%%time
data_lm = (TextList.from_df(trainDF, path, cols='text', processor=SPProcessor.load(dest))
    .split_by_rand_pct(0.2, seed=100)
    .label_for_lm()           
    .databunch(bs=bs, num_workers=1, backwards=True))

In [None]:
data_lm.save(f'{path}/{lang}_databunch_lm_tweets_bc_sp15_multifit_bwd_v2')

### Training

In [None]:
%%time
data_lm = load_data(path, f'{lang}_databunch_lm_tweets_bc_sp15_multifit_bwd_v2', bs=bs, backwards=True)

In [None]:
config = awd_lstm_lm_config.copy()
config['qrnn'] = True
config['n_hid'] = 1550 #default 1152
config['n_layers'] = 4 #default 3

In [None]:
%%time
perplexity = Perplexity()
learn_lm = language_model_learner(data_lm, AWD_LSTM, config=config, pretrained_fnames=lm_fns3_bwd, drop_mult=1., 
                                  metrics=[error_rate, accuracy, perplexity])#.to_fp16()

#### Change loss function

In [None]:
learn_lm.loss_func

In [None]:
learn_lm.loss_func = FlattenedLoss(LabelSmoothingCrossEntropy)

In [None]:
learn_lm.loss_func

#### Training

In [None]:
learn_lm.lr_find()

In [None]:
learn_lm.recorder.plot()

In [None]:
lr = 2e-2
lr *= bs/48

wd = 0.1

In [None]:
learn_lm.fit_one_cycle(2, lr*10, wd=wd, moms=(0.8,0.7))

In [None]:
learn_lm.save(f'{lang}fine_tuned1_tweets_bc_sp15_multifit_bwd_v2')
learn_lm.save_encoder(f'{lang}fine_tuned1_enc_tweets_bc_sp15_multifit_bwd_v2')

epoch	train_loss	valid_loss	error_rate	accuracy	perplexity	time  <br>
0	6.302666	6.044674	0.826155	0.173845	221.182205	00:21 <br>
1	6.046424	5.839687	0.799895	0.200105	177.380646	00:21 <br>
2	5.769688	5.541620	0.755515	0.244485	128.012085	00:21 <br>
3	5.479696	5.314439	0.725683	0.274317	97.298874	00:21 <br>
4	5.247477	5.137167	0.701471	0.298529	80.597542	00:21 <br>
5	5.024149	5.031209	0.680777	0.319223	70.631744	00:21 <br>
6	4.859836	4.965009	0.674632	0.325368	65.067368	00:21 <br>
7	4.721925	4.890521	0.664653	0.335347	61.066811	00:21 <br>
8	4.551728	4.858989	0.659611	0.340389	58.249828	00:21 <br>
9	4.462481	4.832177	0.650945	0.349055	56.585121	00:21 <br>
10	4.295513	4.806365	0.644538	0.355462	54.819786	00:21 <br>
11	4.177893	4.800797	0.639968	0.360032	54.325031	00:21 <br>
12	4.050987	4.800620	0.638130	0.361870	53.666927	00:21 <br>
13	3.878635	4.797090	0.633298	0.366702	53.689964	00:21 <br>
14	3.804556	4.794771	0.630672	0.369328	53.636059	00:21 <br>
15	3.691840	4.814564	0.631092	0.368908	54.688404	00:21 <br>
16	3.636143	4.826870	0.631302	0.368697	55.196342	00:21 <br>
17	3.620973	4.828074	0.630935	0.369065	55.224644	00:21 <br>

In [None]:
learn_lm.unfreeze()
learn_lm.fit_one_cycle(18, lr, wd=wd, moms=(0.8,0.7), callbacks=[ShowGraph(learn_lm)])

In [None]:
learn_lm.save(f'{lang}fine_tuned2_tweets_bc_sp15_multifit_bwd_v2')
learn_lm.save_encoder(f'{lang}fine_tuned2_enc_tweets_bc_sp15_multifit_bwd_v2')

Save best LM learner and its encoder

In [None]:
learn_lm.save(f'{lang}fine_tuned_tweets_bc_sp15_multifit_bwd_v2')
learn_lm.save_encoder(f'{lang}fine_tuned_enc_tweets_bc_sp15_multifit_bwd_v2')

## Fine-tuning "forward Classifier"

In [None]:
bs = 18

### Databunch

In [None]:
%%time
data_lm = load_data(path, f'{lang}_databunch_lm_tweets_bc_sp15_multifit_v2', bs=bs)

In [None]:
%%time
data_clas = (TextList.from_df(trainDF, path, vocab=data_lm.vocab, cols='text', processor=SPProcessor.load(dest))
    .split_by_rand_pct(0.2, seed=100)
    .label_from_df(cols='label')
    .databunch(bs=bs, num_workers=1))

In [None]:
%%time
data_clas.save(f'{lang}_textlist_class_tweets_bc_sp15_multifit_v2')

### Get weights to penalize loss function of the majority class

In [None]:
%%time
data_clas = load_data(path, f'{lang}_textlist_class_tweets_bc_sp15_multifit_v2', bs=bs, num_workers=1)

In [None]:
num_trn = len(data_clas.train_ds.x)
num_val = len(data_clas.valid_ds.x)
num_trn, num_val, num_trn+num_val

In [None]:
trn_LabelCounts = np.unique(data_clas.train_ds.y.items, return_counts=True)[1]
val_LabelCounts = np.unique(data_clas.valid_ds.y.items, return_counts=True)[1]
trn_LabelCounts, val_LabelCounts

In [None]:
trn_weights = [1 - count/num_trn for count in trn_LabelCounts]
val_weights = [1 - count/num_val for count in val_LabelCounts]
trn_weights, val_weights

### Training (Loss = FlattenedLoss of weighted LabelSmoothingCrossEntropy)

In [None]:
%%time
data_clas = load_data(path, f'{lang}_textlist_class_tweets_bc_sp15_multifit_v2', bs=bs, num_workers=1)

In [None]:
config = awd_lstm_clas_config.copy()
config['qrnn'] = True
config['n_hid'] = 1550 #default 1152
config['n_layers'] = 4 #default 3

In [None]:
learn_c = text_classifier_learner(data_clas, AWD_LSTM, config=config, pretrained=False, drop_mult=0.3, 
                                  metrics=[accuracy,f1])#.to_fp16()
learn_c.load_encoder(path+'/models/'+f'{lang}fine_tuned_enc_tweets_bc_sp15_multifit_v2');

#### Change loss function

In [None]:
learn_c.loss_func

In [None]:
loss_weights = torch.FloatTensor(trn_weights).cuda()
learn_c.loss_func = FlattenedLoss(WeightedLabelSmoothingCrossEntropy, weight=loss_weights)

In [None]:
learn_c.loss_func

#### Training

In [None]:
learn_c.freeze()

In [None]:
learn_c.lr_find()

In [None]:
learn_c.recorder.plot()

In [None]:
lr = 2e-1
lr *= bs/48

wd = 0.1

In [None]:
learn_c.fit_one_cycle(2, lr, wd=wd, moms=(0.8,0.7))

In [None]:
learn_c.save(f'{lang}clas_tweets_bc_sp15_multifit_v2')

In [None]:
learn_c.fit_one_cycle(2, lr, wd=wd, moms=(0.8,0.7))

In [None]:
learn_c.save(f'{lang}clas_tweets_bc_sp15_multifit_v2')

In [None]:
learn_c.freeze_to(-2)
learn_c.fit_one_cycle(2, slice(lr/(2.6**4),lr), wd=wd, moms=(0.8,0.7))

In [None]:
learn_c.save(f'{lang}clas_tweets_bc_sp15_multifit_v2')

In [None]:
learn_c.freeze_to(-3)
learn_c.fit_one_cycle(2, slice(lr/2/(2.6**4),lr/2), wd=wd, moms=(0.8,0.7))

In [None]:
learn_c.save(f'{lang}clas_tweets_bc_sp15_multifit_v2')

In [None]:
learn_c.unfreeze()
learn_c.fit_one_cycle(4, slice(lr/10/(2.6**4),lr/10), wd=wd, moms=(0.8,0.7))

In [None]:
learn_c.save(f'{lang}clas_tweets_bc_sp15_multifit_v2')

In [None]:
learn_c.load(f'{lang}clas_tweets_bc_sp15_multifit_v2')
learn_c.fit_one_cycle(4, slice(lr/100/(2.6**4),lr/100), wd=wd, moms=(0.8,0.7))

In [None]:
learn_c.save(f'{lang}clas_tweets_bc_sp15_multifit_v2')

In [None]:
learn_c.load(f'{lang}clas_tweets_bc_sp15_multifit_v2')
learn_c.fit_one_cycle(2, slice(lr/1000/(2.6**4),lr/1000), wd=wd, moms=(0.8,0.7))

In [None]:
learn_c.save(f'{lang}clas_tweets_bc_sp15_multifit_v2')

### Confusion matrix

In [None]:
%%time
data_clas = load_data(path, f'{lang}_textlist_class_tweets_bc_sp15_multifit_v2', bs=bs, num_workers=1);

config = awd_lstm_clas_config.copy()
config['qrnn'] = True
config['n_hid'] = 1550 #default 1152
config['n_layers'] = 4 #default 3

learn_c = text_classifier_learner(data_clas, AWD_LSTM, config=config)

In [None]:
learn_c.load(f'{lang}clas_tweets_bc_sp15_multifit_v2', purge=False);

In [None]:
preds,y,losses = learn_c.get_preds(with_loss=True)
predictions = np.argmax(preds, axis = 1)

interp = ClassificationInterpretation(learn_c, preds, y, losses)
interp.plot_confusion_matrix()

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(np.array(y), np.array(predictions))
print(cm)

## acc
print(f'accuracy global: {(cm[0,0]+cm[1,1])/(cm.sum())}')

# acc neg, acc pos
print(f'accuracy on class 0: {cm[0,0]/(cm.sum(1)[0])*100}') 
print(f'accuracy on class 1: {cm[1,1]/(cm.sum(1)[1])*100}')


In [None]:
from sklearn.metrics import f1_score, classification_report, cohen_kappa_score, accuracy_score, balanced_accuracy_score, roc_auc_score, recall_score, precision_score

print(classification_report(y_true=y,
                            y_pred=predictions))    

print("Kappa score: ", cohen_kappa_score(y, predictions),"\n")
print("Accuracy score: ", accuracy_score(y, predictions),"\n")
print("f1 macro score: ", f1_score(y, predictions, average='macro'),"\n")
print("Balanced Accuracy score: ", balanced_accuracy_score(y, predictions),"\n")
print("ROC AUC: ", roc_auc_score(y, predictions),"\n")
print("Recall: ", recall_score(y, predictions, pos_label = 0, average='binary'),"\n")
print("Precision: ", precision_score(y, predictions, pos_label = 0, average='binary'),"\n")


In [None]:
learn_c.show_results()

### Predictions some random sentences

In [None]:
# Get the prediction
test_text = "Compra de papéis podres dos bancos vai virar dívida pública. Fattorelli fala sobre a regulamentação que o Banco Central expediu para a operações de compra de papéis podres dos bancos Vídeo de 3 min, assistam compartilhem importante!".lower()
pred = learn_c.predict(test_text)
print(pred)

In [None]:
# The darker the word-shading in the below example, the more it contributes to the classification. 
txt_ci = TextClassificationInterpretation.from_learner(learn_c)
txt_ci.show_intrinsic_attention(test_text,cmap=plt.cm.Purples)

In [None]:
txt_ci.intrinsic_attention(test_text)[1]

In [None]:
# tabulation showing the first k texts in top_losses along with their prediction, actual,loss, and probability of actual class.
# max_len is the maximum number of tokens displayed. If max_len=None, it will display all tokens.
txt_ci.show_top_losses(5)

## Fine-tuning "backward Classifier"

In [None]:
import warnings
warnings.filterwarnings('ignore')  # "error", "ignore", "always", "default", "module" or "on

In [None]:
bs = 18

### Databunch

In [None]:
%%time
data_lm = load_data(path, f'{lang}_databunch_lm_tweets_bc_sp15_multifit_bwd_v2', bs=bs, backwards=True)

In [None]:
%%time
data_clas = (TextList.from_df(trainDF, path, cols='text', processor=SPProcessor.load(dest), vocab=data_lm.vocab)
    .split_by_rand_pct(0.2, seed=100)
    .label_from_df(cols='label')
    .databunch(bs=bs, num_workers=1, backwards=True))

In [None]:
%%time
data_clas.save(f'{lang}_textlist_class_tweets_bc_sp15_multifit_bwd_v2')

### Get weights to penalize loss function of the majority class

In [None]:
%%time
data_clas = load_data(path, f'{lang}_textlist_class_tweets_bc_sp15_multifit_bwd_v2', bs=bs, num_workers=1, backwards=True)

In [None]:
num_trn = len(data_clas.train_ds.x)
num_val = len(data_clas.valid_ds.x)
num_trn, num_val, num_trn+num_val

In [None]:
trn_LabelCounts = np.unique(data_clas.train_ds.y.items, return_counts=True)[1]
val_LabelCounts = np.unique(data_clas.valid_ds.y.items, return_counts=True)[1]
trn_LabelCounts, val_LabelCounts

In [None]:
trn_weights = [1 - count/num_trn for count in trn_LabelCounts]
val_weights = [1 - count/num_val for count in val_LabelCounts]
#trn_weights, val_weights

### Training (Loss = FlattenedLoss of weighted LabelSmoothingCrossEntropy)

In [None]:
%%time
data_clas = load_data(path, f'{lang}_textlist_class_tweets_bc_sp15_multifit_bwd_v2', bs=bs, num_workers=1, backwards=True)

In [None]:
config = awd_lstm_clas_config.copy()
config['qrnn'] = True
config['n_hid'] = 1550 #default 1152
config['n_layers'] = 4 #default 3

In [None]:
learn_c = text_classifier_learner(data_clas, AWD_LSTM, config=config, drop_mult=0.3, metrics=[accuracy,f1]).to_fp16()
learn_c.load_encoder(f'{lang}fine_tuned_enc_tweets_bc_sp15_multifit_bwd_v2');

#### Change loss function

In [None]:
learn_c.loss_func

In [None]:
loss_weights = torch.FloatTensor(trn_weights).cuda()
learn_c.loss_func = FlattenedLoss(WeightedLabelSmoothingCrossEntropy, weight=loss_weights)

In [None]:
learn_c.loss_func

#### Training

In [None]:
learn_c.freeze()

In [None]:
learn_c.lr_find()

In [None]:
learn_c.recorder.plot()

In [None]:
lr = 2e-1
lr *= bs/48

wd = 0.1

In [None]:
learn_c.fit_one_cycle(2, lr, wd=wd, moms=(0.8,0.7))

In [None]:
learn_c.save(f'{lang}clas_tweets_bc_sp15_multifit_bwd_v2')

In [None]:
learn_c.fit_one_cycle(2, lr, wd=wd, moms=(0.8,0.7))

In [None]:
learn_c.save(f'{lang}clas_tweets_bco_sp15_multifit_bwd_v2')

In [None]:
learn_c.freeze_to(-2)
learn_c.fit_one_cycle(2, slice(lr/(2.6**4),lr), wd=wd, moms=(0.8,0.7))

In [None]:
learn_c.save(f'{lang}clas_tweets_bc_sp15_multifit_bwd_v2')

In [None]:
learn_c.freeze_to(-3)
learn_c.fit_one_cycle(2, slice(lr/2/(2.6**4),lr/2), wd=wd, moms=(0.8,0.7))

In [None]:
learn_c.save(f'{lang}clas_tweets_bc_sp15_multifit_bwd_v2')

In [None]:
learn_c.unfreeze()
learn_c.fit_one_cycle(4, slice(lr/10/(2.6**4),lr/10), wd=wd, moms=(0.8,0.7))

In [None]:
learn_c.save(f'{lang}clas_tweets_bc_sp15_multifit_bwd_v2')

In [None]:
learn_c.fit_one_cycle(4, slice(lr/100/(2.6**4),lr/100), wd=wd, moms=(0.8,0.7))

In [None]:
learn_c.save(f'{lang}clas_tweets_bc_sp15_multifit_bwd_v2')

In [None]:
learn_c.load(f'{lang}clas_tweets_bc_sp15_multifit_bwd_v2')
learn_c.fit_one_cycle(1, slice(lr/1000/(2.6**4),lr/1000), wd=wd, moms=(0.8,0.7))

In [None]:
learn_c.fit_one_cycle(1, slice(lr/1000/(2.6**4),lr/1000), wd=wd, moms=(0.8,0.7))

In [None]:
learn_c.save(f'{lang}clas_tweets_bc_sp15_multifit_bwd_v2')

In [None]:
learn_c.load(f'{lang}clas_tweets_bc_sp15_multifit_bwd_v2');
learn_c.to_fp32().export(f'{lang}_classifier_tweets_bc_sp15_multifit_bwd_v2')

### Confusion matrix

In [None]:
%%time
data_clas = load_data(path, f'{lang}_textlist_class_tweets_bc_sp15_multifit_bwd_v2', bs=bs, num_workers=1, backwards=True)

config = awd_lstm_clas_config.copy()
config['qrnn'] = True
config['n_hid'] = 1550 #default 1152
config['n_layers'] = 4 #default 3

learn_c = text_classifier_learner(data_clas, AWD_LSTM, config=config)

In [None]:
learn_c.load(f'{lang}clas_tweets_bc_sp15_multifit_bwd_v2', purge=False);

In [None]:
preds,y,losses = learn_c.get_preds(with_loss=True)
predictions = np.argmax(preds, axis = 1)

interp = ClassificationInterpretation(learn_c, preds, y, losses)
interp.plot_confusion_matrix()

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(np.array(y), np.array(predictions))
print(cm)

## acc
print(f'accuracy global: {(cm[0,0]+cm[1,1])/(cm.sum())}')
#print(f'accuracy global: {(cm[0,0]+cm[1,1]+cm[2,2]+cm[3,3])/(cm.sum())}')

# acc neg, acc pos
print(f'accuracy on class 0: {cm[0,0]/(cm.sum(1)[0])*100}') 
print(f'accuracy on class 1: {cm[1,1]/(cm.sum(1)[1])*100}')
#print(f'accuracy on class 2: {cm[2,2]/(cm.sum(1)[2])*100}')
#print(f'accuracy on class 3: {cm[3,3]/(cm.sum(1)[3])*100}')

In [None]:
from sklearn.metrics import f1_score, classification_report, cohen_kappa_score, accuracy_score, balanced_accuracy_score, roc_auc_score, recall_score, precision_score

print(classification_report(y_true=y,
                            y_pred=predictions))    

print("Kappa score: ", cohen_kappa_score(y, predictions),"\n")
print("Accuracy score: ", accuracy_score(y, predictions),"\n")
print("f1 macro score: ", f1_score(y, predictions, average='macro'),"\n")
print("Balanced Accuracy score: ", balanced_accuracy_score(y, predictions),"\n")
print("ROC AUC: ", roc_auc_score(y, predictions),"\n")
print("Recall: ", recall_score(y, predictions, pos_label = 0, average='binary'),"\n")
print("Precision: ", precision_score(y, predictions, pos_label = 0, average='binary'),"\n")


# Nova seção

In [None]:
learn_c.show_results()

### Predictions some random sentences

In [None]:
# Get the prediction
test_text = "Já passou da hora do banco central atuar.".lower()
pred = learn_c.predict(test_text)
print(pred)

In [None]:
# The darker the word-shading in the below example, the more it contributes to the classification. 
txt_ci = TextClassificationInterpretation.from_learner(learn_c)
txt_ci.show_intrinsic_attention(test_text,cmap=plt.cm.Purples)

In [None]:
txt_ci.intrinsic_attention(test_text)[1]

In [None]:
# tabulation showing the first k texts in top_losses along with their prediction, actual,loss, and probability of actual class.
# max_len is the maximum number of tokens displayed. If max_len=None, it will display all tokens.
txt_ci.show_top_losses(5)

## Ensemble

In [None]:
bs = 18

In [None]:
config = awd_lstm_clas_config.copy()
config['qrnn'] = True
config['n_hid'] = 1550 #default 1152
config['n_layers'] = 4 #default 3

In [None]:
data_clas = load_data(path, f'{lang}_textlist_class_tweets_bc_sp15_multifit_v2', bs=bs, num_workers=1)
learn_c = text_classifier_learner(data_clas, AWD_LSTM, config=config, drop_mult=0.3, metrics=[accuracy,f1]).to_fp16()
learn_c.load(f'{lang}clas_tweets_bc_sp15_multifit_v2', purge=False);

In [None]:
preds,targs = learn_c.get_preds(ordered=True)
accuracy(preds,targs),f1(preds,targs)

In [None]:
data_clas_bwd = load_data(path, f'{lang}_textlist_class_tweets_bc_sp15_multifit_bwd_v2', bs=bs, num_workers=1, backwards=True)
learn_c_bwd = text_classifier_learner(data_clas_bwd, AWD_LSTM, config=config, drop_mult=0.3, metrics=[accuracy,f1]).to_fp16()
learn_c_bwd.load(f'{lang}clas_tweets_bc_sp15_multifit_bwd_v2', purge=False);

In [None]:
preds_b,targs_b = learn_c_bwd.get_preds(ordered=True)
accuracy(preds_b,targs_b),f1(preds_b,targs_b)

In [None]:
preds_avg = (preds+preds_b)/2

In [None]:
accuracy(preds_avg,targs_b),f1(preds_avg,targs_b)

In [None]:
from sklearn.metrics import confusion_matrix

predictions = np.argmax(preds_avg, axis = 1)
cm = confusion_matrix(np.array(targs_b), np.array(predictions))
print(cm)

## acc
print(f'accuracy global: {(cm[0,0]+cm[1,1])/(cm.sum())}')

# acc neg, acc pos
print(f'accuracy on class 0: {cm[0,0]/(cm.sum(1)[0])*100}') 
print(f'accuracy on class 1: {cm[1,1]/(cm.sum(1)[1])*100}')


In [None]:
from sklearn.metrics import f1_score, classification_report, cohen_kappa_score, accuracy_score, balanced_accuracy_score, roc_auc_score, recall_score, precision_score

print(classification_report(y_true=targs_b,
                            y_pred=predictions))    

print("Kappa score: ", cohen_kappa_score(targs_b, predictions),"\n")
print("Accuracy score: ", accuracy_score(targs_b, predictions),"\n")
print("f1 macro score: ", f1_score(targs_b, predictions, average='macro'),"\n")
print("Balanced Accuracy score: ", balanced_accuracy_score(targs_b, predictions),"\n")
print("ROC AUC: ", roc_auc_score(targs_b, predictions),"\n")
print("Recall: ", recall_score(targs_b, predictions, pos_label = 0, average='binary'),"\n")
print("Precision: ", precision_score(targs_b, predictions, pos_label = 0, average='binary'),"\n")

