In [1]:
import json
import os

import pandas as pd
import spacy
from joblib import dump
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_selection import RFECV, SelectKBest, mutual_info_classif
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
from spacy.pipeline.functions import merge_entities
from tqdm import tqdm

In [2]:
from train_documents_classification_model import *

  from pandas import Panel


In [3]:
DATASET_PATH

'data'

In [4]:
df = load_dataset()

In [5]:
df

Unnamed: 0,text,label
0,PCAR \n \n \n \n \n \n PART 7 \n \n \n \...,1
1,LIQUOR AMENDMENT (24-HOUR ECONOMY) BILL 2020 \...,1
2,Subordinate Legislation Act 1989 No 146\nNew S...,1
3,GOVERNMENT GAZETTE – DD Month YYYY\nGovernment...,1
4,\nNew South Wales\nCOVID-19 Legislation Amend...,1
...,...,...
401,\nPublic Health Brief \n \nRichard O. Johnson...,0
402,\nPublic Health Brief \n \nRichard O. Johnson...,0
403,"Public Health Brief \n \nRichard O. Johnson, M...",0
404,THE ESTATE AGENTS (AMENDMENT) BILL (5 MAY 2020...,0


In [6]:
train, test = train_test_split(df, stratify=df.label, random_state=123)
text_preprocessing = FunctionTransformer(func=preprocess_text)

mutual_info_selector = SelectKBest(mutual_info_classif, k=10000)
recurse_importance_selector = RFECV(
    estimator=LogisticRegression(penalty="l1", solver="saga"),
    min_features_to_select=20,
    n_jobs=-1,
    verbose=True,
    step=10,
)
classifier = LogisticRegression(penalty="l2")

In [7]:
pipeline = Pipeline(
    [
        (
            "preprocessing",
            Pipeline(
                [
                    ("text_processor", text_preprocessing),
                    ("count_vectorizer", CountVectorizer()),
                ]
            ),
        ),
        (
            "feature_selection",
            Pipeline(
                [
                    ("mutual_info_selector", mutual_info_selector),
                    ("recurse_importance_selector", recurse_importance_selector),
                ]
            ),
        ),
        ("classifier", classifier),
    ]
)

X_train = train.text
y_train = train.label

In [8]:
type(test.text)

pandas.core.series.Series

In [9]:
pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_train)

print("Train acc:", accuracy_score(y_train, y_pred))
X_test = test.text
y_test = test.label

pred_test = pipeline.predict(X_test)

print("Test acc:", accuracy_score(y_test, pred_test))

dump(pipeline, "data/classification_pipeline.joblib")

100%|████████████████████████████████████████████████████████████████████████| 304/304 [07:51<00:00,  1.55s/it]
100%|████████████████████████████████████████████████████████████████████████| 304/304 [00:10<00:00, 27.97it/s]
100%|███████████████████████████████████████████████████████████████████████| 304/304 [00:00<00:00, 315.10it/s]


Fitting estimator with 10000 features.




Fitting estimator with 9990 features.




Fitting estimator with 9980 features.




Fitting estimator with 9970 features.




Fitting estimator with 9960 features.




Fitting estimator with 9950 features.




Fitting estimator with 9940 features.




Fitting estimator with 9930 features.




Fitting estimator with 9920 features.




Fitting estimator with 9910 features.




Fitting estimator with 9900 features.




Fitting estimator with 9890 features.




Fitting estimator with 9880 features.




Fitting estimator with 9870 features.




Fitting estimator with 9860 features.




Fitting estimator with 9850 features.




Fitting estimator with 9840 features.




Fitting estimator with 9830 features.




Fitting estimator with 9820 features.




Fitting estimator with 9810 features.




Fitting estimator with 9800 features.




Fitting estimator with 9790 features.




Fitting estimator with 9780 features.




Fitting estimator with 9770 features.




Fitting estimator with 9760 features.




Fitting estimator with 9750 features.




Fitting estimator with 9740 features.




Fitting estimator with 9730 features.




Fitting estimator with 9720 features.




Fitting estimator with 9710 features.




Fitting estimator with 9700 features.




Fitting estimator with 9690 features.




Fitting estimator with 9680 features.




Fitting estimator with 9670 features.




Fitting estimator with 9660 features.




Fitting estimator with 9650 features.




Fitting estimator with 9640 features.




Fitting estimator with 9630 features.




Fitting estimator with 9620 features.




Fitting estimator with 9610 features.




Fitting estimator with 9600 features.




Fitting estimator with 9590 features.




Fitting estimator with 9580 features.




Fitting estimator with 9570 features.




Fitting estimator with 9560 features.




Fitting estimator with 9550 features.




Fitting estimator with 9540 features.




Fitting estimator with 9530 features.




Fitting estimator with 9520 features.




Fitting estimator with 9510 features.




Fitting estimator with 9500 features.




Fitting estimator with 9490 features.




Fitting estimator with 9480 features.




Fitting estimator with 9470 features.




Fitting estimator with 9460 features.




Fitting estimator with 9450 features.




Fitting estimator with 9440 features.




Fitting estimator with 9430 features.




Fitting estimator with 9420 features.




Fitting estimator with 9410 features.




Fitting estimator with 9400 features.




Fitting estimator with 9390 features.




Fitting estimator with 9380 features.




Fitting estimator with 9370 features.




Fitting estimator with 9360 features.




Fitting estimator with 9350 features.




Fitting estimator with 9340 features.




Fitting estimator with 9330 features.




Fitting estimator with 9320 features.




Fitting estimator with 9310 features.




Fitting estimator with 9300 features.




Fitting estimator with 9290 features.




Fitting estimator with 9280 features.




Fitting estimator with 9270 features.




Fitting estimator with 9260 features.




Fitting estimator with 9250 features.




Fitting estimator with 9240 features.




Fitting estimator with 9230 features.




Fitting estimator with 9220 features.




Fitting estimator with 9210 features.




Fitting estimator with 9200 features.




Fitting estimator with 9190 features.




Fitting estimator with 9180 features.




Fitting estimator with 9170 features.




Fitting estimator with 9160 features.




Fitting estimator with 9150 features.




Fitting estimator with 9140 features.




Fitting estimator with 9130 features.




Fitting estimator with 9120 features.




Fitting estimator with 9110 features.




Fitting estimator with 9100 features.




Fitting estimator with 9090 features.




Fitting estimator with 9080 features.




Fitting estimator with 9070 features.




Fitting estimator with 9060 features.




Fitting estimator with 9050 features.




Fitting estimator with 9040 features.




Fitting estimator with 9030 features.




Fitting estimator with 9020 features.




Fitting estimator with 9010 features.




Fitting estimator with 9000 features.




Fitting estimator with 8990 features.




Fitting estimator with 8980 features.




Fitting estimator with 8970 features.




Fitting estimator with 8960 features.




Fitting estimator with 8950 features.




Fitting estimator with 8940 features.




Fitting estimator with 8930 features.




Fitting estimator with 8920 features.




Fitting estimator with 8910 features.




Fitting estimator with 8900 features.




Fitting estimator with 8890 features.




Fitting estimator with 8880 features.




Fitting estimator with 8870 features.




Fitting estimator with 8860 features.




Fitting estimator with 8850 features.




Fitting estimator with 8840 features.




Fitting estimator with 8830 features.




Fitting estimator with 8820 features.




Fitting estimator with 8810 features.




Fitting estimator with 8800 features.




Fitting estimator with 8790 features.




Fitting estimator with 8780 features.




Fitting estimator with 8770 features.




Fitting estimator with 8760 features.




Fitting estimator with 8750 features.




Fitting estimator with 8740 features.




Fitting estimator with 8730 features.




Fitting estimator with 8720 features.




Fitting estimator with 8710 features.




Fitting estimator with 8700 features.




Fitting estimator with 8690 features.




Fitting estimator with 8680 features.




Fitting estimator with 8670 features.




Fitting estimator with 8660 features.




Fitting estimator with 8650 features.




Fitting estimator with 8640 features.




Fitting estimator with 8630 features.




Fitting estimator with 8620 features.




Fitting estimator with 8610 features.




Fitting estimator with 8600 features.




Fitting estimator with 8590 features.




Fitting estimator with 8580 features.




Fitting estimator with 8570 features.




Fitting estimator with 8560 features.




Fitting estimator with 8550 features.




Fitting estimator with 8540 features.




Fitting estimator with 8530 features.




Fitting estimator with 8520 features.




Fitting estimator with 8510 features.




Fitting estimator with 8500 features.




Fitting estimator with 8490 features.




Fitting estimator with 8480 features.




Fitting estimator with 8470 features.




Fitting estimator with 8460 features.




Fitting estimator with 8450 features.




Fitting estimator with 8440 features.




Fitting estimator with 8430 features.




Fitting estimator with 8420 features.




Fitting estimator with 8410 features.




Fitting estimator with 8400 features.




Fitting estimator with 8390 features.




Fitting estimator with 8380 features.




Fitting estimator with 8370 features.




Fitting estimator with 8360 features.




Fitting estimator with 8350 features.




Fitting estimator with 8340 features.




Fitting estimator with 8330 features.




Fitting estimator with 8320 features.




Fitting estimator with 8310 features.




Fitting estimator with 8300 features.




Fitting estimator with 8290 features.




Fitting estimator with 8280 features.




Fitting estimator with 8270 features.




Fitting estimator with 8260 features.




Fitting estimator with 8250 features.




Fitting estimator with 8240 features.




Fitting estimator with 8230 features.




Fitting estimator with 8220 features.




Fitting estimator with 8210 features.




Fitting estimator with 8200 features.




Fitting estimator with 8190 features.




Fitting estimator with 8180 features.




Fitting estimator with 8170 features.




Fitting estimator with 8160 features.




Fitting estimator with 8150 features.




Fitting estimator with 8140 features.




Fitting estimator with 8130 features.




Fitting estimator with 8120 features.




Fitting estimator with 8110 features.




Fitting estimator with 8100 features.




Fitting estimator with 8090 features.




Fitting estimator with 8080 features.




Fitting estimator with 8070 features.




Fitting estimator with 8060 features.




Fitting estimator with 8050 features.




Fitting estimator with 8040 features.




Fitting estimator with 8030 features.




Fitting estimator with 8020 features.




Fitting estimator with 8010 features.




Fitting estimator with 8000 features.




Fitting estimator with 7990 features.




Fitting estimator with 7980 features.




Fitting estimator with 7970 features.




Fitting estimator with 7960 features.




Fitting estimator with 7950 features.




Fitting estimator with 7940 features.




Fitting estimator with 7930 features.




Fitting estimator with 7920 features.




Fitting estimator with 7910 features.




Fitting estimator with 7900 features.




Fitting estimator with 7890 features.




Fitting estimator with 7880 features.




Fitting estimator with 7870 features.




Fitting estimator with 7860 features.




Fitting estimator with 7850 features.




Fitting estimator with 7840 features.




Fitting estimator with 7830 features.




Fitting estimator with 7820 features.




Fitting estimator with 7810 features.




Fitting estimator with 7800 features.




Fitting estimator with 7790 features.




Fitting estimator with 7780 features.




Fitting estimator with 7770 features.




Fitting estimator with 7760 features.




Fitting estimator with 7750 features.




Fitting estimator with 7740 features.




Fitting estimator with 7730 features.




Fitting estimator with 7720 features.




Fitting estimator with 7710 features.




Fitting estimator with 7700 features.




Fitting estimator with 7690 features.




Fitting estimator with 7680 features.




Fitting estimator with 7670 features.




Fitting estimator with 7660 features.




Fitting estimator with 7650 features.




Fitting estimator with 7640 features.




Fitting estimator with 7630 features.




Fitting estimator with 7620 features.




Fitting estimator with 7610 features.




Fitting estimator with 7600 features.




Fitting estimator with 7590 features.




Fitting estimator with 7580 features.




Fitting estimator with 7570 features.




Fitting estimator with 7560 features.




Fitting estimator with 7550 features.




Fitting estimator with 7540 features.




Fitting estimator with 7530 features.




Fitting estimator with 7520 features.




Fitting estimator with 7510 features.




Fitting estimator with 7500 features.




Fitting estimator with 7490 features.




Fitting estimator with 7480 features.




Fitting estimator with 7470 features.




Fitting estimator with 7460 features.




Fitting estimator with 7450 features.




Fitting estimator with 7440 features.




Fitting estimator with 7430 features.




Fitting estimator with 7420 features.




Fitting estimator with 7410 features.




Fitting estimator with 7400 features.




Fitting estimator with 7390 features.




Fitting estimator with 7380 features.




Fitting estimator with 7370 features.




Fitting estimator with 7360 features.




Fitting estimator with 7350 features.




Fitting estimator with 7340 features.




Fitting estimator with 7330 features.




Fitting estimator with 7320 features.




Fitting estimator with 7310 features.




Fitting estimator with 7300 features.




Fitting estimator with 7290 features.




Fitting estimator with 7280 features.




Fitting estimator with 7270 features.




Fitting estimator with 7260 features.




Fitting estimator with 7250 features.




Fitting estimator with 7240 features.




Fitting estimator with 7230 features.




Fitting estimator with 7220 features.




Fitting estimator with 7210 features.




Fitting estimator with 7200 features.




Fitting estimator with 7190 features.




Fitting estimator with 7180 features.




Fitting estimator with 7170 features.




Fitting estimator with 7160 features.




Fitting estimator with 7150 features.




Fitting estimator with 7140 features.




Fitting estimator with 7130 features.




Fitting estimator with 7120 features.




Fitting estimator with 7110 features.




Fitting estimator with 7100 features.




Fitting estimator with 7090 features.




Fitting estimator with 7080 features.




Fitting estimator with 7070 features.




Fitting estimator with 7060 features.




Fitting estimator with 7050 features.




Fitting estimator with 7040 features.




Fitting estimator with 7030 features.




Fitting estimator with 7020 features.




Fitting estimator with 7010 features.




Fitting estimator with 7000 features.




Fitting estimator with 6990 features.




Fitting estimator with 6980 features.




Fitting estimator with 6970 features.




Fitting estimator with 6960 features.




Fitting estimator with 6950 features.




Fitting estimator with 6940 features.




Fitting estimator with 6930 features.




Fitting estimator with 6920 features.




Fitting estimator with 6910 features.




Fitting estimator with 6900 features.




Fitting estimator with 6890 features.




Fitting estimator with 6880 features.




Fitting estimator with 6870 features.




Fitting estimator with 6860 features.




Fitting estimator with 6850 features.




Fitting estimator with 6840 features.




Fitting estimator with 6830 features.




Fitting estimator with 6820 features.




Fitting estimator with 6810 features.




Fitting estimator with 6800 features.




Fitting estimator with 6790 features.




Fitting estimator with 6780 features.




Fitting estimator with 6770 features.




Fitting estimator with 6760 features.




Fitting estimator with 6750 features.




Fitting estimator with 6740 features.




Fitting estimator with 6730 features.




Fitting estimator with 6720 features.




Fitting estimator with 6710 features.




Fitting estimator with 6700 features.




Fitting estimator with 6690 features.




Fitting estimator with 6680 features.




Fitting estimator with 6670 features.




Fitting estimator with 6660 features.




Fitting estimator with 6650 features.




Fitting estimator with 6640 features.




Fitting estimator with 6630 features.




Fitting estimator with 6620 features.




Fitting estimator with 6610 features.




Fitting estimator with 6600 features.




Fitting estimator with 6590 features.




Fitting estimator with 6580 features.




Fitting estimator with 6570 features.




Fitting estimator with 6560 features.




Fitting estimator with 6550 features.




Fitting estimator with 6540 features.




Fitting estimator with 6530 features.




Fitting estimator with 6520 features.




Fitting estimator with 6510 features.




Fitting estimator with 6500 features.




Fitting estimator with 6490 features.




Fitting estimator with 6480 features.




Fitting estimator with 6470 features.




Fitting estimator with 6460 features.




Fitting estimator with 6450 features.




Fitting estimator with 6440 features.




Fitting estimator with 6430 features.




Fitting estimator with 6420 features.




Fitting estimator with 6410 features.




Fitting estimator with 6400 features.




Fitting estimator with 6390 features.




Fitting estimator with 6380 features.




Fitting estimator with 6370 features.




Fitting estimator with 6360 features.




Fitting estimator with 6350 features.




Fitting estimator with 6340 features.




Fitting estimator with 6330 features.




Fitting estimator with 6320 features.




Fitting estimator with 6310 features.




Fitting estimator with 6300 features.




Fitting estimator with 6290 features.




Fitting estimator with 6280 features.




Fitting estimator with 6270 features.




Fitting estimator with 6260 features.




Fitting estimator with 6250 features.




Fitting estimator with 6240 features.




Fitting estimator with 6230 features.




Fitting estimator with 6220 features.




Fitting estimator with 6210 features.




Fitting estimator with 6200 features.




Fitting estimator with 6190 features.




Fitting estimator with 6180 features.




Fitting estimator with 6170 features.




Fitting estimator with 6160 features.




Fitting estimator with 6150 features.




Fitting estimator with 6140 features.




Fitting estimator with 6130 features.




Fitting estimator with 6120 features.




Fitting estimator with 6110 features.




Fitting estimator with 6100 features.




Fitting estimator with 6090 features.




Fitting estimator with 6080 features.




Fitting estimator with 6070 features.




Fitting estimator with 6060 features.




Fitting estimator with 6050 features.




Fitting estimator with 6040 features.




Fitting estimator with 6030 features.




Fitting estimator with 6020 features.




Fitting estimator with 6010 features.




Fitting estimator with 6000 features.




Fitting estimator with 5990 features.




Fitting estimator with 5980 features.




Fitting estimator with 5970 features.




Fitting estimator with 5960 features.




Fitting estimator with 5950 features.




Fitting estimator with 5940 features.




Fitting estimator with 5930 features.




Fitting estimator with 5920 features.




Fitting estimator with 5910 features.




Fitting estimator with 5900 features.




Fitting estimator with 5890 features.




Fitting estimator with 5880 features.




Fitting estimator with 5870 features.




Fitting estimator with 5860 features.




Fitting estimator with 5850 features.




Fitting estimator with 5840 features.




Fitting estimator with 5830 features.




Fitting estimator with 5820 features.




Fitting estimator with 5810 features.




Fitting estimator with 5800 features.




Fitting estimator with 5790 features.




Fitting estimator with 5780 features.




Fitting estimator with 5770 features.




Fitting estimator with 5760 features.




Fitting estimator with 5750 features.




Fitting estimator with 5740 features.




Fitting estimator with 5730 features.




Fitting estimator with 5720 features.




Fitting estimator with 5710 features.




Fitting estimator with 5700 features.




Fitting estimator with 5690 features.




Fitting estimator with 5680 features.




Fitting estimator with 5670 features.




Fitting estimator with 5660 features.




Fitting estimator with 5650 features.




Fitting estimator with 5640 features.




Fitting estimator with 5630 features.




Fitting estimator with 5620 features.




Fitting estimator with 5610 features.




Fitting estimator with 5600 features.




Fitting estimator with 5590 features.




Fitting estimator with 5580 features.




Fitting estimator with 5570 features.




Fitting estimator with 5560 features.




Fitting estimator with 5550 features.




Fitting estimator with 5540 features.




Fitting estimator with 5530 features.




Fitting estimator with 5520 features.




Fitting estimator with 5510 features.




Fitting estimator with 5500 features.




Fitting estimator with 5490 features.




Fitting estimator with 5480 features.




Fitting estimator with 5470 features.




Fitting estimator with 5460 features.




Fitting estimator with 5450 features.




Fitting estimator with 5440 features.




Fitting estimator with 5430 features.




Fitting estimator with 5420 features.




Fitting estimator with 5410 features.




Fitting estimator with 5400 features.




Fitting estimator with 5390 features.




Fitting estimator with 5380 features.




Fitting estimator with 5370 features.




Fitting estimator with 5360 features.




Fitting estimator with 5350 features.




Fitting estimator with 5340 features.




Fitting estimator with 5330 features.




Fitting estimator with 5320 features.




Fitting estimator with 5310 features.




Fitting estimator with 5300 features.




Fitting estimator with 5290 features.




Fitting estimator with 5280 features.




Fitting estimator with 5270 features.




Fitting estimator with 5260 features.




Fitting estimator with 5250 features.




Fitting estimator with 5240 features.




Fitting estimator with 5230 features.




Fitting estimator with 5220 features.




Fitting estimator with 5210 features.




Fitting estimator with 5200 features.




Fitting estimator with 5190 features.




Fitting estimator with 5180 features.




Fitting estimator with 5170 features.




Fitting estimator with 5160 features.




Fitting estimator with 5150 features.




Fitting estimator with 5140 features.




Fitting estimator with 5130 features.




Fitting estimator with 5120 features.




Fitting estimator with 5110 features.




Fitting estimator with 5100 features.




Fitting estimator with 5090 features.




Fitting estimator with 5080 features.




Fitting estimator with 5070 features.




Fitting estimator with 5060 features.




Fitting estimator with 5050 features.




Fitting estimator with 5040 features.




Fitting estimator with 5030 features.




Fitting estimator with 5020 features.




Fitting estimator with 5010 features.




Fitting estimator with 5000 features.




Fitting estimator with 4990 features.




Fitting estimator with 4980 features.




Fitting estimator with 4970 features.




Fitting estimator with 4960 features.




Fitting estimator with 4950 features.




Fitting estimator with 4940 features.




Fitting estimator with 4930 features.




Fitting estimator with 4920 features.




Fitting estimator with 4910 features.




Fitting estimator with 4900 features.




Fitting estimator with 4890 features.




Fitting estimator with 4880 features.




Fitting estimator with 4870 features.




Fitting estimator with 4860 features.




Fitting estimator with 4850 features.




Fitting estimator with 4840 features.




Fitting estimator with 4830 features.




Fitting estimator with 4820 features.




Fitting estimator with 4810 features.




Fitting estimator with 4800 features.




Fitting estimator with 4790 features.




Fitting estimator with 4780 features.




Fitting estimator with 4770 features.




Fitting estimator with 4760 features.




Fitting estimator with 4750 features.




Fitting estimator with 4740 features.




Fitting estimator with 4730 features.




Fitting estimator with 4720 features.




Fitting estimator with 4710 features.




Fitting estimator with 4700 features.




Fitting estimator with 4690 features.




Fitting estimator with 4680 features.




Fitting estimator with 4670 features.




Fitting estimator with 4660 features.




Fitting estimator with 4650 features.




Fitting estimator with 4640 features.




Fitting estimator with 4630 features.




Fitting estimator with 4620 features.




Fitting estimator with 4610 features.




Fitting estimator with 4600 features.




Fitting estimator with 4590 features.




Fitting estimator with 4580 features.




Fitting estimator with 4570 features.




Fitting estimator with 4560 features.




Fitting estimator with 4550 features.




Fitting estimator with 4540 features.




Fitting estimator with 4530 features.




Fitting estimator with 4520 features.




Fitting estimator with 4510 features.




Fitting estimator with 4500 features.




Fitting estimator with 4490 features.




Fitting estimator with 4480 features.




Fitting estimator with 4470 features.




Fitting estimator with 4460 features.




Fitting estimator with 4450 features.




Fitting estimator with 4440 features.




Fitting estimator with 4430 features.




Fitting estimator with 4420 features.




Fitting estimator with 4410 features.




Fitting estimator with 4400 features.




Fitting estimator with 4390 features.




Fitting estimator with 4380 features.




Fitting estimator with 4370 features.




Fitting estimator with 4360 features.




Fitting estimator with 4350 features.




Fitting estimator with 4340 features.




Fitting estimator with 4330 features.




Fitting estimator with 4320 features.




Fitting estimator with 4310 features.




Fitting estimator with 4300 features.




Fitting estimator with 4290 features.




Fitting estimator with 4280 features.




Fitting estimator with 4270 features.




Fitting estimator with 4260 features.




Fitting estimator with 4250 features.




Fitting estimator with 4240 features.




Fitting estimator with 4230 features.




Fitting estimator with 4220 features.




Fitting estimator with 4210 features.




Fitting estimator with 4200 features.




Fitting estimator with 4190 features.




Fitting estimator with 4180 features.




Fitting estimator with 4170 features.




Fitting estimator with 4160 features.




Fitting estimator with 4150 features.




Fitting estimator with 4140 features.




Fitting estimator with 4130 features.




Fitting estimator with 4120 features.




Fitting estimator with 4110 features.




Fitting estimator with 4100 features.




Fitting estimator with 4090 features.




Fitting estimator with 4080 features.




Fitting estimator with 4070 features.




Fitting estimator with 4060 features.




Fitting estimator with 4050 features.




Fitting estimator with 4040 features.




Fitting estimator with 4030 features.




Fitting estimator with 4020 features.




Fitting estimator with 4010 features.




Fitting estimator with 4000 features.




Fitting estimator with 3990 features.




Fitting estimator with 3980 features.




Fitting estimator with 3970 features.




Fitting estimator with 3960 features.
Fitting estimator with 3950 features.




Fitting estimator with 3940 features.




Fitting estimator with 3930 features.




Fitting estimator with 3920 features.




Fitting estimator with 3910 features.




Fitting estimator with 3900 features.




Fitting estimator with 3890 features.




Fitting estimator with 3880 features.




Fitting estimator with 3870 features.




Fitting estimator with 3860 features.




Fitting estimator with 3850 features.




Fitting estimator with 3840 features.




Fitting estimator with 3830 features.




Fitting estimator with 3820 features.




Fitting estimator with 3810 features.




Fitting estimator with 3800 features.




Fitting estimator with 3790 features.




Fitting estimator with 3780 features.




Fitting estimator with 3770 features.




Fitting estimator with 3760 features.




Fitting estimator with 3750 features.




Fitting estimator with 3740 features.




Fitting estimator with 3730 features.




Fitting estimator with 3720 features.




Fitting estimator with 3710 features.




Fitting estimator with 3700 features.




Fitting estimator with 3690 features.




Fitting estimator with 3680 features.




Fitting estimator with 3670 features.




Fitting estimator with 3660 features.




Fitting estimator with 3650 features.




Fitting estimator with 3640 features.




Fitting estimator with 3630 features.




Fitting estimator with 3620 features.




Fitting estimator with 3610 features.




Fitting estimator with 3600 features.




Fitting estimator with 3590 features.




Fitting estimator with 3580 features.




Fitting estimator with 3570 features.




Fitting estimator with 3560 features.




Fitting estimator with 3550 features.




Fitting estimator with 3540 features.




Fitting estimator with 3530 features.




Fitting estimator with 3520 features.




Fitting estimator with 3510 features.




Fitting estimator with 3500 features.




Fitting estimator with 3490 features.




Fitting estimator with 3480 features.




Fitting estimator with 3470 features.




Fitting estimator with 3460 features.




Fitting estimator with 3450 features.




Fitting estimator with 3440 features.




Fitting estimator with 3430 features.




Fitting estimator with 3420 features.




Fitting estimator with 3410 features.




Fitting estimator with 3400 features.




Fitting estimator with 3390 features.




Fitting estimator with 3380 features.




Fitting estimator with 3370 features.




Fitting estimator with 3360 features.




Fitting estimator with 3350 features.




Fitting estimator with 3340 features.




Fitting estimator with 3330 features.




Fitting estimator with 3320 features.




Fitting estimator with 3310 features.




Fitting estimator with 3300 features.




Fitting estimator with 3290 features.




Fitting estimator with 3280 features.




Fitting estimator with 3270 features.




Fitting estimator with 3260 features.




Fitting estimator with 3250 features.




Fitting estimator with 3240 features.




Fitting estimator with 3230 features.




Fitting estimator with 3220 features.




Fitting estimator with 3210 features.




Fitting estimator with 3200 features.




Fitting estimator with 3190 features.




Fitting estimator with 3180 features.




Fitting estimator with 3170 features.




Fitting estimator with 3160 features.




Fitting estimator with 3150 features.




Fitting estimator with 3140 features.




Fitting estimator with 3130 features.




Fitting estimator with 3120 features.




Fitting estimator with 3110 features.




Fitting estimator with 3100 features.




Fitting estimator with 3090 features.




Fitting estimator with 3080 features.




Fitting estimator with 3070 features.




Fitting estimator with 3060 features.




Fitting estimator with 3050 features.




Fitting estimator with 3040 features.




Fitting estimator with 3030 features.




Fitting estimator with 3020 features.




Fitting estimator with 3010 features.




Fitting estimator with 3000 features.




Fitting estimator with 2990 features.




Fitting estimator with 2980 features.




Fitting estimator with 2970 features.




Fitting estimator with 2960 features.




Fitting estimator with 2950 features.




Fitting estimator with 2940 features.




Fitting estimator with 2930 features.




Fitting estimator with 2920 features.




Fitting estimator with 2910 features.




Fitting estimator with 2900 features.




Fitting estimator with 2890 features.




Fitting estimator with 2880 features.




Fitting estimator with 2870 features.




Fitting estimator with 2860 features.




Fitting estimator with 2850 features.




Fitting estimator with 2840 features.




Fitting estimator with 2830 features.




Fitting estimator with 2820 features.




Fitting estimator with 2810 features.




Fitting estimator with 2800 features.




Fitting estimator with 2790 features.




Fitting estimator with 2780 features.




Fitting estimator with 2770 features.




Fitting estimator with 2760 features.




Fitting estimator with 2750 features.




Fitting estimator with 2740 features.




Fitting estimator with 2730 features.




Fitting estimator with 2720 features.




Fitting estimator with 2710 features.




Fitting estimator with 2700 features.




Fitting estimator with 2690 features.




Fitting estimator with 2680 features.




Fitting estimator with 2670 features.




Fitting estimator with 2660 features.




Fitting estimator with 2650 features.




Fitting estimator with 2640 features.




Fitting estimator with 2630 features.




Fitting estimator with 2620 features.




Fitting estimator with 2610 features.




Fitting estimator with 2600 features.




Fitting estimator with 2590 features.




Fitting estimator with 2580 features.




Fitting estimator with 2570 features.




Fitting estimator with 2560 features.




Fitting estimator with 2550 features.




Fitting estimator with 2540 features.




Fitting estimator with 2530 features.




Fitting estimator with 2520 features.




Fitting estimator with 2510 features.




Fitting estimator with 2500 features.




Fitting estimator with 2490 features.




Fitting estimator with 2480 features.




Fitting estimator with 2470 features.




Fitting estimator with 2460 features.




Fitting estimator with 2450 features.




Fitting estimator with 2440 features.




Fitting estimator with 2430 features.




Fitting estimator with 2420 features.




Fitting estimator with 2410 features.




Fitting estimator with 2400 features.




Fitting estimator with 2390 features.




Fitting estimator with 2380 features.




Fitting estimator with 2370 features.




Fitting estimator with 2360 features.




Fitting estimator with 2350 features.




Fitting estimator with 2340 features.




Fitting estimator with 2330 features.




Fitting estimator with 2320 features.




Fitting estimator with 2310 features.




Fitting estimator with 2300 features.




Fitting estimator with 2290 features.




Fitting estimator with 2280 features.




Fitting estimator with 2270 features.




Fitting estimator with 2260 features.




Fitting estimator with 2250 features.




Fitting estimator with 2240 features.




Fitting estimator with 2230 features.




Fitting estimator with 2220 features.




Fitting estimator with 2210 features.




Fitting estimator with 2200 features.




Fitting estimator with 2190 features.




Fitting estimator with 2180 features.




Fitting estimator with 2170 features.




Fitting estimator with 2160 features.




Fitting estimator with 2150 features.




Fitting estimator with 2140 features.




Fitting estimator with 2130 features.




Fitting estimator with 2120 features.




Fitting estimator with 2110 features.




Fitting estimator with 2100 features.




Fitting estimator with 2090 features.




Fitting estimator with 2080 features.




Fitting estimator with 2070 features.




Fitting estimator with 2060 features.




Fitting estimator with 2050 features.




Fitting estimator with 2040 features.




Fitting estimator with 2030 features.




Fitting estimator with 2020 features.




Fitting estimator with 2010 features.




Fitting estimator with 2000 features.




Fitting estimator with 1990 features.




Fitting estimator with 1980 features.




Fitting estimator with 1970 features.




Fitting estimator with 1960 features.




Fitting estimator with 1950 features.




Fitting estimator with 1940 features.




Fitting estimator with 1930 features.




Fitting estimator with 1920 features.




Fitting estimator with 1910 features.




Fitting estimator with 1900 features.




Fitting estimator with 1890 features.




Fitting estimator with 1880 features.




Fitting estimator with 1870 features.




Fitting estimator with 1860 features.




Fitting estimator with 1850 features.




Fitting estimator with 1840 features.




Fitting estimator with 1830 features.




Fitting estimator with 1820 features.




Fitting estimator with 1810 features.




Fitting estimator with 1800 features.




Fitting estimator with 1790 features.




Fitting estimator with 1780 features.




Fitting estimator with 1770 features.




Fitting estimator with 1760 features.




Fitting estimator with 1750 features.




Fitting estimator with 1740 features.




Fitting estimator with 1730 features.




Fitting estimator with 1720 features.




Fitting estimator with 1710 features.




Fitting estimator with 1700 features.




Fitting estimator with 1690 features.




Fitting estimator with 1680 features.




Fitting estimator with 1670 features.




Fitting estimator with 1660 features.




Fitting estimator with 1650 features.




Fitting estimator with 1640 features.




Fitting estimator with 1630 features.




Fitting estimator with 1620 features.




Fitting estimator with 1610 features.




Fitting estimator with 1600 features.




Fitting estimator with 1590 features.




Fitting estimator with 1580 features.




Fitting estimator with 1570 features.




Fitting estimator with 1560 features.




Fitting estimator with 1550 features.




Fitting estimator with 1540 features.




Fitting estimator with 1530 features.




Fitting estimator with 1520 features.




Fitting estimator with 1510 features.




Fitting estimator with 1500 features.




Fitting estimator with 1490 features.




Fitting estimator with 1480 features.




Fitting estimator with 1470 features.




Fitting estimator with 1460 features.




Fitting estimator with 1450 features.




Fitting estimator with 1440 features.




Fitting estimator with 1430 features.




Fitting estimator with 1420 features.




Fitting estimator with 1410 features.




Fitting estimator with 1400 features.




Fitting estimator with 1390 features.




Fitting estimator with 1380 features.




Fitting estimator with 1370 features.




Fitting estimator with 1360 features.




Fitting estimator with 1350 features.




Fitting estimator with 1340 features.




Fitting estimator with 1330 features.




Fitting estimator with 1320 features.




Fitting estimator with 1310 features.




Fitting estimator with 1300 features.




Fitting estimator with 1290 features.




Fitting estimator with 1280 features.




Fitting estimator with 1270 features.




Fitting estimator with 1260 features.




Fitting estimator with 1250 features.




Fitting estimator with 1240 features.




Fitting estimator with 1230 features.




Fitting estimator with 1220 features.




Fitting estimator with 1210 features.




Fitting estimator with 1200 features.




Fitting estimator with 1190 features.




Fitting estimator with 1180 features.




Fitting estimator with 1170 features.




Fitting estimator with 1160 features.




Fitting estimator with 1150 features.




Fitting estimator with 1140 features.




Fitting estimator with 1130 features.




Fitting estimator with 1120 features.




Fitting estimator with 1110 features.




Fitting estimator with 1100 features.




Fitting estimator with 1090 features.




Fitting estimator with 1080 features.




Fitting estimator with 1070 features.




Fitting estimator with 1060 features.




Fitting estimator with 1050 features.




Fitting estimator with 1040 features.




Fitting estimator with 1030 features.




Fitting estimator with 1020 features.




Fitting estimator with 1010 features.




Fitting estimator with 1000 features.




Fitting estimator with 990 features.




Fitting estimator with 980 features.




Fitting estimator with 970 features.




Fitting estimator with 960 features.




Fitting estimator with 950 features.




Fitting estimator with 940 features.




Fitting estimator with 930 features.




Fitting estimator with 920 features.




Fitting estimator with 910 features.




Fitting estimator with 900 features.




Fitting estimator with 890 features.




Fitting estimator with 880 features.




Fitting estimator with 870 features.




Fitting estimator with 860 features.




Fitting estimator with 850 features.




Fitting estimator with 840 features.




Fitting estimator with 830 features.




Fitting estimator with 820 features.




Fitting estimator with 810 features.




Fitting estimator with 800 features.




Fitting estimator with 790 features.




Fitting estimator with 780 features.




Fitting estimator with 770 features.




Fitting estimator with 760 features.




Fitting estimator with 750 features.




Fitting estimator with 740 features.




Fitting estimator with 730 features.




Fitting estimator with 720 features.




Fitting estimator with 710 features.
Fitting estimator with 700 features.




Fitting estimator with 690 features.
Fitting estimator with 680 features.




Fitting estimator with 670 features.
Fitting estimator with 660 features.




Fitting estimator with 650 features.
Fitting estimator with 640 features.




Fitting estimator with 630 features.
Fitting estimator with 620 features.




Fitting estimator with 610 features.
Fitting estimator with 600 features.
Fitting estimator with 590 features.




Fitting estimator with 580 features.
Fitting estimator with 570 features.




Fitting estimator with 560 features.
Fitting estimator with 550 features.




Fitting estimator with 540 features.
Fitting estimator with 530 features.




Fitting estimator with 520 features.
Fitting estimator with 510 features.




Fitting estimator with 500 features.
Fitting estimator with 490 features.




Fitting estimator with 480 features.
Fitting estimator with 470 features.




Fitting estimator with 460 features.
Fitting estimator with 450 features.




Fitting estimator with 440 features.
Fitting estimator with 430 features.




Fitting estimator with 420 features.
Fitting estimator with 410 features.




Fitting estimator with 400 features.
Fitting estimator with 390 features.




Fitting estimator with 380 features.
Fitting estimator with 370 features.




Fitting estimator with 360 features.
Fitting estimator with 350 features.




Fitting estimator with 340 features.
Fitting estimator with 330 features.




Fitting estimator with 320 features.
Fitting estimator with 310 features.




Fitting estimator with 300 features.
Fitting estimator with 290 features.




Fitting estimator with 280 features.
Fitting estimator with 270 features.




Fitting estimator with 260 features.
Fitting estimator with 250 features.
Fitting estimator with 240 features.
Fitting estimator with 230 features.




Fitting estimator with 220 features.
Fitting estimator with 210 features.
Fitting estimator with 200 features.




Fitting estimator with 190 features.
Fitting estimator with 180 features.
Fitting estimator with 170 features.




Fitting estimator with 160 features.
Fitting estimator with 150 features.
Fitting estimator with 140 features.




Fitting estimator with 130 features.
Fitting estimator with 120 features.
Fitting estimator with 110 features.
Fitting estimator with 100 features.
Fitting estimator with 90 features.
Fitting estimator with 80 features.


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
100%|████████████████████████████████████████████████████████████████████████| 304/304 [06:46<00:00,  1.34s/it]
100%|████████████████████████████████████████████████████████████████████████| 304/304 [00:07<00:00, 40.88it/s]
100%|███████████████████████████████████████████████████████████████████████| 304/304 [00:00<00:00, 431.72it/s]
  0%|                                                                                  | 0/102 [00:00<?, ?it/s]

Train acc: 1.0


100%|████████████████████████████████████████████████████████████████████████| 102/102 [02:13<00:00,  1.31s/it]
100%|████████████████████████████████████████████████████████████████████████| 102/102 [00:03<00:00, 33.72it/s]
100%|███████████████████████████████████████████████████████████████████████| 102/102 [00:00<00:00, 393.71it/s]


Test acc: 0.9803921568627451


['data/classification_pipeline.joblib']

In [10]:
y_test.shape

(102,)

In [11]:
from sklearn.metrics import f1_score
print("Test f1:", f1_score(y_test, pred_test))

Test f1: 0.9666666666666666


In [None]:
import numpy as np

In [None]:
# np.savetxt('lr_coefs.txt', pipeline.steps[2][1].coef_)

In [None]:
# dump(pipeline, "data/classification_pipeline.joblib")