In [134]:
import pandas as pd
import numpy as np

import plotly.express as px
from sklearn.linear_model import LogisticRegression
from plotly.graph_objects import *
from plotly.offline import init_notebook_mode,iplot
init_notebook_mode(connected=True)
from sklearn.datasets import load_wine

In [135]:
wine = load_wine() #wine
print(wine['DESCR'])

.. _wine_dataset:

Wine recognition dataset
------------------------

**Data Set Characteristics:**

    :Number of Instances: 178 (50 in each of three classes)
    :Number of Attributes: 13 numeric, predictive attributes and the class
    :Attribute Information:
 		- Alcohol
 		- Malic acid
 		- Ash
		- Alcalinity of ash  
 		- Magnesium
		- Total phenols
 		- Flavanoids
 		- Nonflavanoid phenols
 		- Proanthocyanins
		- Color intensity
 		- Hue
 		- OD280/OD315 of diluted wines
 		- Proline

    - class:
            - class_0
            - class_1
            - class_2
		
    :Summary Statistics:
    
                                   Min   Max   Mean     SD
    Alcohol:                      11.0  14.8    13.0   0.8
    Malic Acid:                   0.74  5.80    2.34  1.12
    Ash:                          1.36  3.23    2.36  0.27
    Alcalinity of Ash:            10.6  30.0    19.5   3.3
    Magnesium:                    70.0 162.0    99.7  14.3
    Total Phenols:                0

In [136]:
df = pd.DataFrame(data = np.c_[wine['data'],wine['target']],
                 columns=wine['feature_names'] + ['target'])
df.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0.0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,0.0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,0.0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,0.0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0.0


In [137]:
df.isnull().sum()

alcohol                         0
malic_acid                      0
ash                             0
alcalinity_of_ash               0
magnesium                       0
total_phenols                   0
flavanoids                      0
nonflavanoid_phenols            0
proanthocyanins                 0
color_intensity                 0
hue                             0
od280/od315_of_diluted_wines    0
proline                         0
target                          0
dtype: int64

In [138]:
df.select_dtypes(np.number).columns

Index(['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium',
       'total_phenols', 'flavanoids', 'nonflavanoid_phenols',
       'proanthocyanins', 'color_intensity', 'hue',
       'od280/od315_of_diluted_wines', 'proline', 'target'],
      dtype='object')

In [139]:
corr=df.corr()

In [140]:
corr[((corr>0.7)|(corr<-0.7)) & (corr != 1)] #Висока кореляція між факторами flavanoids,total_phenols; total_phenols та flavanoids

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
alcohol,,,,,,,,,,,,,,
malic_acid,,,,,,,,,,,,,,
ash,,,,,,,,,,,,,,
alcalinity_of_ash,,,,,,,,,,,,,,
magnesium,,,,,,,,,,,,,,
total_phenols,,,,,,,0.864564,,,,,,,-0.719163
flavanoids,,,,,,0.864564,,,,,,0.787194,,-0.847498
nonflavanoid_phenols,,,,,,,,,,,,,,
proanthocyanins,,,,,,,,,,,,,,
color_intensity,,,,,,,,,,,,,,


In [141]:
df.select_dtypes(include=np.number).var() #дисперсія

alcohol                             0.659062
malic_acid                          1.248015
ash                                 0.075265
alcalinity_of_ash                  11.152686
magnesium                         203.989335
total_phenols                       0.391690
flavanoids                          0.997719
nonflavanoid_phenols                0.015489
proanthocyanins                     0.327595
color_intensity                     5.374449
hue                                 0.052245
od280/od315_of_diluted_wines        0.504086
proline                         99166.717355
target                              0.600679
dtype: float64

In [142]:
x = df[df.columns.drop(['target','flavanoids'])] #видаляємо 
y = df['target']

In [143]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,
                                                   test_size=0.2)

In [144]:
x_train.describe() 

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
count,142.0,142.0,142.0,142.0,142.0,142.0,142.0,142.0,142.0,142.0,142.0,142.0
mean,13.002465,2.363662,2.359859,19.576056,99.704225,2.289577,0.361408,1.578239,5.105563,0.946873,2.608944,732.760563
std,0.788808,1.107685,0.283301,3.129746,14.544521,0.638622,0.122182,0.56815,2.388166,0.229204,0.711633,295.942824
min,11.03,0.9,1.36,10.6,70.0,0.98,0.13,0.42,1.28,0.48,1.29,278.0
25%,12.37,1.5775,2.2025,17.85,88.0,1.7425,0.27,1.1675,3.05,0.7725,1.9225,504.0
50%,13.04,1.9,2.355,19.5,98.0,2.38,0.34,1.545,4.85,0.955,2.775,660.0
75%,13.635,3.115,2.555,21.5,107.75,2.8,0.445,1.95,6.2,1.09,3.1775,918.75
max,14.83,5.8,3.23,28.5,162.0,3.88,0.63,3.58,13.0,1.71,3.92,1515.0


--LogisticRegression

In [145]:
#LogisticRegression()
log = LogisticRegression()
log.fit(x_train,y_train)


lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression



LogisticRegression()

In [147]:
log.coef_,log.intercept_

(array([[-0.1571872 ,  0.31305138,  0.1779559 , -0.29195819, -0.01592591,
          0.2080829 , -0.01845433,  0.11069189, -0.08882928, -0.01791595,
          0.35041983,  0.01029402],
        [ 0.41653305, -0.90664309, -0.17549814,  0.27177357, -0.00719688,
          0.4385997 , -0.0211404 ,  0.44454062, -1.38317617,  0.28153747,
          0.59959339, -0.00638603],
        [-0.25934585,  0.59359171, -0.00245776,  0.02018462,  0.02312278,
         -0.64668259,  0.03959473, -0.55523251,  1.47200546, -0.26362153,
         -0.95001322, -0.00390799]]),
 array([-0.04749397,  0.09479841, -0.04730444]))

In [148]:
preds = log.predict(x_test)
preds

array([1., 1., 1., 2., 0., 1., 0., 2., 2., 0., 1., 1., 0., 1., 0., 1., 0.,
       2., 0., 1., 1., 2., 1., 0., 1., 0., 0., 0., 0., 0., 1., 1., 2., 2.,
       0., 0.])

In [149]:
#ймовірності p
predict_prob=log.predict_proba(x_test)
predict_prob[0]

array([3.76033629e-03, 9.95895994e-01, 3.43669938e-04])

In [150]:
from scipy.special import softmax

In [154]:
#заносимо дані у таблицю, 
df_1=pd.DataFrame(x_test)
df_1['y']=pd.DataFrame(y_test)
df_1['pred_cl']=log.predict(x_test) #наше передбачення
df_1['pred_prob']=softmax(x_test.index) #передбачення ймовірності

df_1.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,y,pred_cl,pred_prob
84,11.84,0.89,2.58,18.0,94.0,2.2,0.22,2.35,3.05,0.79,3.08,520.0,1.0,1.0,3.9798489999999997e-41
128,12.37,1.63,2.3,24.5,88.0,2.22,0.4,1.9,2.12,0.89,2.78,342.0,1.0,1.0,5.114743000000001e-22
87,11.65,1.67,2.62,26.0,88.0,1.92,0.4,1.34,2.6,1.36,3.21,562.0,1.0,1.0,7.993741e-40
172,14.16,2.51,2.48,20.0,91.0,1.68,0.44,1.24,9.7,0.62,1.71,660.0,2.0,2.0,0.006573263
37,13.05,1.65,2.55,18.0,98.0,2.45,0.29,1.44,4.25,1.12,2.51,1105.0,0.0,0.0,1.5417930000000001e-61


In [155]:
#метрика
(df_1['y'] == df_1['pred_cl']).mean()

0.9166666666666666

In [156]:
#метрика 
log.score(x_test, y_test)

0.9166666666666666

In [157]:
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score

In [158]:
precision_score(df_1['y'],df_1['pred_cl'],average='macro')

0.8825396825396825

In [159]:
recall_score(df_1['y'],df_1['pred_cl'], average='macro')

0.9027777777777778

In [160]:
f1_score(df_1['y'],df_1['pred_cl'], average='macro')

0.8893604479811376

In [161]:
roc_auc_score(y_test, log.predict_proba(x_test), multi_class='ovr')

0.9894675925925926

--DecisionTreeClassifier

In [272]:
#DecisionTreeClassifier
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
import seaborn as sns

In [273]:
tree = DecisionTreeClassifier(random_state=1)
tree.fit(x_train, y_train)
preds = tree.predict(x_test)


from sklearn.metrics import r2_score, mean_absolute_error
print('test_r2',r2_score(y_test, preds))
print('test_MAE',mean_absolute_error(y_test,preds))

preds_train = tree.predict(x_train)
print('train_r2',r2_score(y_train, preds_train))
print('train_MAE',mean_absolute_error(y_train,preds_train))

test_r2 0.5609756097560976
test_MAE 0.16666666666666666
train_r2 1.0
train_MAE 0.0


In [274]:
from sklearn.model_selection import GridSearchCV

In [275]:
params = {'max_depth': [2,3,5,10,20], 'max_leaf_nodes':[0,200,300,400,500], 'min_samples_leaf':[5,10,20,30,50]}
tree_grid = GridSearchCV(tree,
                        params,
                        verbose=3,
                        cv = 5)
tree_grid.fit(x_train,y_train)

Fitting 5 folds for each of 125 candidates, totalling 625 fits
[CV 1/5] END max_depth=2, max_leaf_nodes=0, min_samples_leaf=5;, score=nan total time=   0.0s
[CV 2/5] END max_depth=2, max_leaf_nodes=0, min_samples_leaf=5;, score=nan total time=   0.0s
[CV 3/5] END max_depth=2, max_leaf_nodes=0, min_samples_leaf=5;, score=nan total time=   0.0s
[CV 4/5] END max_depth=2, max_leaf_nodes=0, min_samples_leaf=5;, score=nan total time=   0.0s
[CV 5/5] END max_depth=2, max_leaf_nodes=0, min_samples_leaf=5;, score=nan total time=   0.0s
[CV 1/5] END max_depth=2, max_leaf_nodes=0, min_samples_leaf=10;, score=nan total time=   0.0s
[CV 2/5] END max_depth=2, max_leaf_nodes=0, min_samples_leaf=10;, score=nan total time=   0.0s
[CV 3/5] END max_depth=2, max_leaf_nodes=0, min_samples_leaf=10;, score=nan total time=   0.0s
[CV 4/5] END max_depth=2, max_leaf_nodes=0, min_samples_leaf=10;, score=nan total time=   0.0s
[CV 5/5] END max_depth=2, max_leaf_nodes=0, min_samples_leaf=10;, score=nan total time=

[CV 3/5] END max_depth=2, max_leaf_nodes=500, min_samples_leaf=50;, score=0.536 total time=   0.0s
[CV 4/5] END max_depth=2, max_leaf_nodes=500, min_samples_leaf=50;, score=0.643 total time=   0.0s
[CV 5/5] END max_depth=2, max_leaf_nodes=500, min_samples_leaf=50;, score=0.643 total time=   0.0s
[CV 1/5] END max_depth=3, max_leaf_nodes=0, min_samples_leaf=5;, score=nan total time=   0.0s
[CV 2/5] END max_depth=3, max_leaf_nodes=0, min_samples_leaf=5;, score=nan total time=   0.0s
[CV 3/5] END max_depth=3, max_leaf_nodes=0, min_samples_leaf=5;, score=nan total time=   0.0s
[CV 4/5] END max_depth=3, max_leaf_nodes=0, min_samples_leaf=5;, score=nan total time=   0.0s
[CV 5/5] END max_depth=3, max_leaf_nodes=0, min_samples_leaf=5;, score=nan total time=   0.0s
[CV 1/5] END max_depth=3, max_leaf_nodes=0, min_samples_leaf=10;, score=nan total time=   0.0s
[CV 2/5] END max_depth=3, max_leaf_nodes=0, min_samples_leaf=10;, score=nan total time=   0.0s
[CV 3/5] END max_depth=3, max_leaf_nodes=0,

[CV 4/5] END max_depth=3, max_leaf_nodes=400, min_samples_leaf=20;, score=0.929 total time=   0.0s
[CV 5/5] END max_depth=3, max_leaf_nodes=400, min_samples_leaf=20;, score=0.964 total time=   0.0s
[CV 1/5] END max_depth=3, max_leaf_nodes=400, min_samples_leaf=30;, score=0.862 total time=   0.0s
[CV 2/5] END max_depth=3, max_leaf_nodes=400, min_samples_leaf=30;, score=0.897 total time=   0.0s
[CV 3/5] END max_depth=3, max_leaf_nodes=400, min_samples_leaf=30;, score=0.857 total time=   0.0s
[CV 4/5] END max_depth=3, max_leaf_nodes=400, min_samples_leaf=30;, score=0.929 total time=   0.0s
[CV 5/5] END max_depth=3, max_leaf_nodes=400, min_samples_leaf=30;, score=0.964 total time=   0.0s
[CV 1/5] END max_depth=3, max_leaf_nodes=400, min_samples_leaf=50;, score=0.621 total time=   0.0s
[CV 2/5] END max_depth=3, max_leaf_nodes=400, min_samples_leaf=50;, score=0.655 total time=   0.0s
[CV 3/5] END max_depth=3, max_leaf_nodes=400, min_samples_leaf=50;, score=0.536 total time=   0.0s
[CV 4/5] E

[CV 2/5] END max_depth=5, max_leaf_nodes=300, min_samples_leaf=20;, score=0.897 total time=   0.0s
[CV 3/5] END max_depth=5, max_leaf_nodes=300, min_samples_leaf=20;, score=0.857 total time=   0.0s
[CV 4/5] END max_depth=5, max_leaf_nodes=300, min_samples_leaf=20;, score=0.929 total time=   0.0s
[CV 5/5] END max_depth=5, max_leaf_nodes=300, min_samples_leaf=20;, score=0.964 total time=   0.0s
[CV 1/5] END max_depth=5, max_leaf_nodes=300, min_samples_leaf=30;, score=0.862 total time=   0.0s
[CV 2/5] END max_depth=5, max_leaf_nodes=300, min_samples_leaf=30;, score=0.897 total time=   0.0s
[CV 3/5] END max_depth=5, max_leaf_nodes=300, min_samples_leaf=30;, score=0.857 total time=   0.0s
[CV 4/5] END max_depth=5, max_leaf_nodes=300, min_samples_leaf=30;, score=0.929 total time=   0.0s
[CV 5/5] END max_depth=5, max_leaf_nodes=300, min_samples_leaf=30;, score=0.964 total time=   0.0s
[CV 1/5] END max_depth=5, max_leaf_nodes=300, min_samples_leaf=50;, score=0.621 total time=   0.0s
[CV 2/5] E

[CV 5/5] END max_depth=10, max_leaf_nodes=200, min_samples_leaf=10;, score=0.964 total time=   0.0s
[CV 1/5] END max_depth=10, max_leaf_nodes=200, min_samples_leaf=20;, score=0.862 total time=   0.0s
[CV 2/5] END max_depth=10, max_leaf_nodes=200, min_samples_leaf=20;, score=0.897 total time=   0.0s
[CV 3/5] END max_depth=10, max_leaf_nodes=200, min_samples_leaf=20;, score=0.857 total time=   0.0s
[CV 4/5] END max_depth=10, max_leaf_nodes=200, min_samples_leaf=20;, score=0.929 total time=   0.0s
[CV 5/5] END max_depth=10, max_leaf_nodes=200, min_samples_leaf=20;, score=0.964 total time=   0.0s
[CV 1/5] END max_depth=10, max_leaf_nodes=200, min_samples_leaf=30;, score=0.862 total time=   0.0s
[CV 2/5] END max_depth=10, max_leaf_nodes=200, min_samples_leaf=30;, score=0.897 total time=   0.0s
[CV 3/5] END max_depth=10, max_leaf_nodes=200, min_samples_leaf=30;, score=0.857 total time=   0.0s
[CV 4/5] END max_depth=10, max_leaf_nodes=200, min_samples_leaf=30;, score=0.929 total time=   0.0s


[CV 5/5] END max_depth=20, max_leaf_nodes=200, min_samples_leaf=20;, score=0.964 total time=   0.0s
[CV 1/5] END max_depth=20, max_leaf_nodes=200, min_samples_leaf=30;, score=0.862 total time=   0.0s
[CV 2/5] END max_depth=20, max_leaf_nodes=200, min_samples_leaf=30;, score=0.897 total time=   0.0s
[CV 3/5] END max_depth=20, max_leaf_nodes=200, min_samples_leaf=30;, score=0.857 total time=   0.0s
[CV 4/5] END max_depth=20, max_leaf_nodes=200, min_samples_leaf=30;, score=0.929 total time=   0.0s
[CV 5/5] END max_depth=20, max_leaf_nodes=200, min_samples_leaf=30;, score=0.964 total time=   0.0s
[CV 1/5] END max_depth=20, max_leaf_nodes=200, min_samples_leaf=50;, score=0.621 total time=   0.0s
[CV 2/5] END max_depth=20, max_leaf_nodes=200, min_samples_leaf=50;, score=0.655 total time=   0.0s
[CV 3/5] END max_depth=20, max_leaf_nodes=200, min_samples_leaf=50;, score=0.536 total time=   0.0s
[CV 4/5] END max_depth=20, max_leaf_nodes=200, min_samples_leaf=50;, score=0.643 total time=   0.0s


[CV 5/5] END max_depth=20, max_leaf_nodes=500, min_samples_leaf=50;, score=0.643 total time=   0.0s




125 fits failed out of a total of 625.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
125 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Dinamicka Laptop\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Dinamicka Laptop\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 937, in fit
    super().fit(
  File "C:\Users\Dinamicka Laptop\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 314, in fit
    raise ValueError(
ValueError: max_leaf_nodes 0 must be either None or larger than 1



One or more of the test scores are non-finite: [       nan        nan        nan

GridSearchCV(cv=5, estimator=DecisionTreeClassifier(random_state=1),
             param_grid={'max_depth': [2, 3, 5, 10, 20],
                         'max_leaf_nodes': [0, 200, 300, 400, 500],
                         'min_samples_leaf': [5, 10, 20, 30, 50]},
             verbose=3)

In [276]:
tree_grid.best_estimator_

DecisionTreeClassifier(max_depth=3, max_leaf_nodes=200, min_samples_leaf=5,
                       random_state=1)

In [277]:
from sklearn.metrics import r2_score

In [278]:
r2_score(y_test,tree_grid.predict(x_test))

0.6707317073170731

---SVC

In [187]:
#SVC
from sklearn.svm import SVC

In [188]:
def train_test_model(x_train, y_train, x_test, y_test,mod,
                    score = accuracy_score):
    mod.fit(x_train,y_train)
    preds_train = mod.predict(x_train)
    preds_test = mod.predict(x_test)
    
    print('Train score')
    print(np.round(score(y_train,preds_train),2))
    print('Test score') 
    print(np.round(score(y_test,preds_test),2))

In [189]:
svc =SVC(kernel='poly')

In [201]:
params = {'degree': [2,3,4,5]}

In [202]:
svc.grid = GridSearchCV(svc, 
                       params,
                       cv = 5,
                       verbose=4,
                       scoring='accuracy')

In [203]:
svc.grid.fit(x_train,y_train)

Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV 1/5] END ..........................degree=2;, score=0.621 total time=   0.0s
[CV 2/5] END ..........................degree=2;, score=0.621 total time=   0.0s
[CV 3/5] END ..........................degree=2;, score=0.607 total time=   0.0s
[CV 4/5] END ..........................degree=2;, score=0.643 total time=   0.0s
[CV 5/5] END ..........................degree=2;, score=0.679 total time=   0.0s
[CV 1/5] END ..........................degree=3;, score=0.586 total time=   0.0s
[CV 2/5] END ..........................degree=3;, score=0.552 total time=   0.0s
[CV 3/5] END ..........................degree=3;, score=0.679 total time=   0.0s
[CV 4/5] END ..........................degree=3;, score=0.679 total time=   0.0s
[CV 5/5] END ..........................degree=3;, score=0.679 total time=   0.0s
[CV 1/5] END ..........................degree=4;, score=0.586 total time=   0.0s
[CV 2/5] END ..........................degree=4;,

GridSearchCV(cv=5, estimator=SVC(kernel='poly'),
             param_grid={'degree': [2, 3, 4, 5]}, scoring='accuracy',
             verbose=4)

In [204]:
svc.grid.best_estimator_

SVC(degree=5, kernel='poly')

--Кросс валідація

In [279]:
from sklearn.model_selection import cross_validate
def base_models(X, y, scoring="roc_auc"):
    classifiers = [('LR', LogisticRegression()),
                   ("TREE",DecisionTreeClassifier(max_depth=3, max_leaf_nodes=200, min_samples_leaf=5,
                       random_state=1)),
                   ("SVC",SVC(degree=5, kernel='poly'))]

    for name, classifier in classifiers:
        cv_results = cross_validate(classifier, X, y, cv=3, scoring=scoring)
        print(f"{scoring}: {round(cv_results['test_score'].mean(), 4)} ({name}) ")

In [280]:
base_models(x_train, y_train, scoring='accuracy') 

accuracy: 0.9295 (LR) 
accuracy: 0.9087 (TREE) 
accuracy: 0.6482 (SVC) 



lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to th

In [281]:
base_models(x_test, y_test, scoring='accuracy') 

accuracy: 0.8889 (LR) 
accuracy: 0.7222 (TREE) 
accuracy: 0.75 (SVC) 



lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to th