# Chapter 13. Introduction to Modeling Libraries in Python


In [1]:
import pandas as pd
import numpy as np
import patsy
# McKinney, Wes. Python for Data Analysis (p. 573). O'Reilly Media. Kindle Edition. 

In [3]:
data = pd.DataFrame({ 'x0': [1, 2, 3, 4, 5], 
                     'x1': [0.01, -0.01, 0.25, -4.1, 0.], 
                     'y': [-1.5, 0., 3.6, 1.3, -2.]})


In [4]:
data.head()

Unnamed: 0,x0,x1,y
0,1,0.01,-1.5
1,2,-0.01,0.0
2,3,0.25,3.6
3,4,-4.1,1.3
4,5,0.0,-2.0


In [5]:
data.columns

Index(['x0', 'x1', 'y'], dtype='object')

In [6]:
data.values

array([[ 1.  ,  0.01, -1.5 ],
       [ 2.  , -0.01,  0.  ],
       [ 3.  ,  0.25,  3.6 ],
       [ 4.  , -4.1 ,  1.3 ],
       [ 5.  ,  0.  , -2.  ]])

In [7]:
df2=data.copy()

In [8]:
df2['strings']=[i for i in 'abcde']
df2

Unnamed: 0,x0,x1,y,strings
0,1,0.01,-1.5,a
1,2,-0.01,0.0,b
2,3,0.25,3.6,c
3,4,-4.1,1.3,d
4,5,0.0,-2.0,e


In [9]:
df2.values

array([[1, 0.01, -1.5, 'a'],
       [2, -0.01, 0.0, 'b'],
       [3, 0.25, 3.6, 'c'],
       [4, -4.1, 1.3, 'd'],
       [5, 0.0, -2.0, 'e']], dtype=object)

In [10]:
model_cols=['x0','x1']
data.loc[:,model_cols].values

array([[ 1.  ,  0.01],
       [ 2.  , -0.01],
       [ 3.  ,  0.25],
       [ 4.  , -4.1 ],
       [ 5.  ,  0.  ]])

In [11]:
data['category'] = pd.Categorical(['a', 'b', 'a', 'a', 'b'],categories =['a', 'b'])
data

Unnamed: 0,x0,x1,y,category
0,1,0.01,-1.5,a
1,2,-0.01,0.0,b
2,3,0.25,3.6,a
3,4,-4.1,1.3,a
4,5,0.0,-2.0,b


In [12]:
dummies = pd.get_dummies( data.category, prefix ='category')
dummies

Unnamed: 0,category_a,category_b
0,1,0
1,0,1
2,1,0
3,1,0
4,0,1


In [13]:
data_with_dummies = data.drop('category', axis = 1).join(dummies)


In [14]:
data_with_dummies


Unnamed: 0,x0,x1,y,category_a,category_b
0,1,0.01,-1.5,1,0
1,2,-0.01,0.0,0,1
2,3,0.25,3.6,1,0
3,4,-4.1,1.3,1,0
4,5,0.0,-2.0,0,1


In [2]:
data = pd.DataFrame({ 'x0': [1, 2, 3, 4, 5], 'x1': [0.01, -0.01, 0.25, -4.1, 0.], 'y': [-1.5, 0., 3.6, 1.3, -2.]})
data

Unnamed: 0,x0,x1,y
0,1,0.01,-1.5
1,2,-0.01,0.0
2,3,0.25,3.6
3,4,-4.1,1.3
4,5,0.0,-2.0


In [3]:
y,X = patsy.dmatrices('y ~ x0 + x1',data)

In [4]:
y

DesignMatrix with shape (5, 1)
     y
  -1.5
   0.0
   3.6
   1.3
  -2.0
  Terms:
    'y' (column 0)

In [5]:
X

DesignMatrix with shape (5, 3)
  Intercept  x0     x1
          1   1   0.01
          1   2  -0.01
          1   3   0.25
          1   4  -4.10
          1   5   0.00
  Terms:
    'Intercept' (column 0)
    'x0' (column 1)
    'x1' (column 2)

In [6]:
np.asarray(y)

array([[-1.5],
       [ 0. ],
       [ 3.6],
       [ 1.3],
       [-2. ]])

In [7]:
np.asarray(X)

array([[ 1.  ,  1.  ,  0.01],
       [ 1.  ,  2.  , -0.01],
       [ 1.  ,  3.  ,  0.25],
       [ 1.  ,  4.  , -4.1 ],
       [ 1.  ,  5.  ,  0.  ]])

In [8]:
patsy.dmatrices('y ~ x0 + x1 +0',data)

(DesignMatrix with shape (5, 1)
      y
   -1.5
    0.0
    3.6
    1.3
   -2.0
   Terms:
     'y' (column 0),
 DesignMatrix with shape (5, 2)
   x0     x1
    1   0.01
    2  -0.01
    3   0.25
    4  -4.10
    5   0.00
   Terms:
     'x0' (column 0)
     'x1' (column 1))

In [9]:
coef,resid,_,_=np.linalg.lstsq(X,y)

  coef,resid,_,_=np.linalg.lstsq(X,y)


In [10]:
coef

array([[ 0.31290976],
       [-0.07910564],
       [-0.26546384]])

In [11]:
coef=pd.Series(coef.squeeze(),index=X.design_info.column_names)
coef

Intercept    0.312910
x0          -0.079106
x1          -0.265464
dtype: float64

# Data Transformations in Patsy Formulas


In [12]:
y, X = patsy.dmatrices('y ~ x0 + np.log(np.abs(x1) + 1)',data)
X

DesignMatrix with shape (5, 3)
  Intercept  x0  np.log(np.abs(x1) + 1)
          1   1                 0.00995
          1   2                 0.00995
          1   3                 0.22314
          1   4                 1.62924
          1   5                 0.00000
  Terms:
    'Intercept' (column 0)
    'x0' (column 1)
    'np.log(np.abs(x1) + 1)' (column 2)

In [13]:
y, X = patsy.dmatrices('y ~ standardize(x0) + center(x1)', data)
X

DesignMatrix with shape (5, 3)
  Intercept  standardize(x0)  center(x1)
          1         -1.41421        0.78
          1         -0.70711        0.76
          1          0.00000        1.02
          1          0.70711       -3.33
          1          1.41421        0.77
  Terms:
    'Intercept' (column 0)
    'standardize(x0)' (column 1)
    'center(x1)' (column 2)

In [14]:
new_data = pd.DataFrame({ 'x0': [6, 7, 8, 9], 'x1': [3.1, -0.5, 0, 2.3], 'y': [1, 2, 3, 4]})


In [15]:
new_X = patsy.build_design_matrices([X.design_info], new_data)
new_X

[DesignMatrix with shape (4, 3)
   Intercept  standardize(x0)  center(x1)
           1          2.12132        3.87
           1          2.82843        0.27
           1          3.53553        0.77
           1          4.24264        3.07
   Terms:
     'Intercept' (column 0)
     'standardize(x0)' (column 1)
     'center(x1)' (column 2)]

In [16]:
data

Unnamed: 0,x0,x1,y
0,1,0.01,-1.5
1,2,-0.01,0.0
2,3,0.25,3.6
3,4,-4.1,1.3
4,5,0.0,-2.0


In [17]:
y,X=patsy.dmatrices('y ~ I(x0+x1)',data)

X

DesignMatrix with shape (5, 2)
  Intercept  I(x0 + x1)
          1        1.01
          1        1.99
          1        3.25
          1       -0.10
          1        5.00
  Terms:
    'Intercept' (column 0)
    'I(x0 + x1)' (column 1)

In [18]:
dir(patsy.builtins)

['C',
 'ContrastMatrix',
 'Diff',
 'Helmert',
 'I',
 'Poly',
 'Q',
 'Sum',
 'Treatment',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 'bs',
 'cc',
 'center',
 'cr',
 'scale',
 'standardize',
 'te',
 'test_I',
 'test_Q']

# Categorical Data and Patsy

In [19]:
data = pd.DataFrame({ 'key1': ['a', 'a', 'b', 'b', 'a', 'b', 'a', 'b'],
                     'key2': [0, 1, 0, 1, 0, 1, 0, 0], 
                     'v1': [1, 2, 3, 4, 5, 6, 7, 8], 
                     'v2': [-1, 0, 2.5, -0.5, 4.0, -1.2, 0.2, -1.7] 
                    })
data

Unnamed: 0,key1,key2,v1,v2
0,a,0,1,-1.0
1,a,1,2,0.0
2,b,0,3,2.5
3,b,1,4,-0.5
4,a,0,5,4.0
5,b,1,6,-1.2
6,a,0,7,0.2
7,b,0,8,-1.7


In [20]:
y,X=patsy.dmatrices('v2~key1',data)
X

DesignMatrix with shape (8, 2)
  Intercept  key1[T.b]
          1          0
          1          0
          1          1
          1          1
          1          0
          1          1
          1          0
          1          1
  Terms:
    'Intercept' (column 0)
    'key1' (column 1)

In [21]:
y

DesignMatrix with shape (8, 1)
    v2
  -1.0
   0.0
   2.5
  -0.5
   4.0
  -1.2
   0.2
  -1.7
  Terms:
    'v2' (column 0)

In [22]:
y,X=patsy.dmatrices('v2~key1+0',data)
X

DesignMatrix with shape (8, 2)
  key1[a]  key1[b]
        1        0
        1        0
        0        1
        0        1
        1        0
        0        1
        1        0
        0        1
  Terms:
    'key1' (columns 0:2)

In [23]:
y, X = patsy.dmatrices('v2 ~ C(key2)', data)
X

DesignMatrix with shape (8, 2)
  Intercept  C(key2)[T.1]
          1             0
          1             1
          1             0
          1             1
          1             0
          1             1
          1             0
          1             0
  Terms:
    'Intercept' (column 0)
    'C(key2)' (column 1)

In [25]:
np.asarray(X)

array([[1., 0.],
       [1., 1.],
       [1., 0.],
       [1., 1.],
       [1., 0.],
       [1., 1.],
       [1., 0.],
       [1., 0.]])

In [26]:
np.asarray(y)

array([[-1. ],
       [ 0. ],
       [ 2.5],
       [-0.5],
       [ 4. ],
       [-1.2],
       [ 0.2],
       [-1.7]])

# design_info attributes

In [46]:
X.design_info

DesignInfo(['Intercept', 'C(key2)[T.1]'],
           factor_infos={EvalFactor('C(key2)'): FactorInfo(factor=EvalFactor('C(key2)'),
                                    type='categorical',
                                    state=<factor state>,
                                    categories=(0, 1))},
           term_codings=OrderedDict([(Term([]),
                                      [SubtermInfo(factors=(),
                                                   contrast_matrices={},
                                                   num_columns=1)]),
                                     (Term([EvalFactor('C(key2)')]),
                                      [SubtermInfo(factors=(EvalFactor('C(key2)'),),
                                                   contrast_matrices={EvalFactor('C(key2)'): ContrastMatrix(array([[0.],
                                                                                            [1.]]),
                                                                      

In [47]:
X.design_info.column_name_indexes

OrderedDict([('Intercept', 0), ('C(key2)[T.1]', 1)])

In [48]:
X.design_info.column_names

['Intercept', 'C(key2)[T.1]']

In [49]:
X.design_info.factor_infos

{EvalFactor('C(key2)'): FactorInfo(factor=EvalFactor('C(key2)'),
            type='categorical',
            state=<factor state>,
            categories=(0, 1))}

In [50]:
X.design_info.linear_constraint

<bound method DesignInfo.linear_constraint of DesignInfo(['Intercept', 'C(key2)[T.1]'],
           factor_infos={EvalFactor('C(key2)'): FactorInfo(factor=EvalFactor('C(key2)'),
                                    type='categorical',
                                    state=<factor state>,
                                    categories=(0, 1))},
           term_codings=OrderedDict([(Term([]),
                                      [SubtermInfo(factors=(),
                                                   contrast_matrices={},
                                                   num_columns=1)]),
                                     (Term([EvalFactor('C(key2)')]),
                                      [SubtermInfo(factors=(EvalFactor('C(key2)'),),
                                                   contrast_matrices={EvalFactor('C(key2)'): ContrastMatrix(array([[0.],
                                                                                            [1.]]),
                        

In [51]:
X.design_info.describe

<bound method DesignInfo.describe of DesignInfo(['Intercept', 'C(key2)[T.1]'],
           factor_infos={EvalFactor('C(key2)'): FactorInfo(factor=EvalFactor('C(key2)'),
                                    type='categorical',
                                    state=<factor state>,
                                    categories=(0, 1))},
           term_codings=OrderedDict([(Term([]),
                                      [SubtermInfo(factors=(),
                                                   contrast_matrices={},
                                                   num_columns=1)]),
                                     (Term([EvalFactor('C(key2)')]),
                                      [SubtermInfo(factors=(EvalFactor('C(key2)'),),
                                                   contrast_matrices={EvalFactor('C(key2)'): ContrastMatrix(array([[0.],
                                                                                            [1.]]),
                                 

In [52]:
X.design_info.terms

[Term([]), Term([EvalFactor('C(key2)')])]

In [27]:
data['key2'] = data['key2'].map({ 0: 'zero', 1: 'one'})
data

Unnamed: 0,key1,key2,v1,v2
0,a,zero,1,-1.0
1,a,one,2,0.0
2,b,zero,3,2.5
3,b,one,4,-0.5
4,a,zero,5,4.0
5,b,one,6,-1.2
6,a,zero,7,0.2
7,b,zero,8,-1.7


In [28]:
y,X=patsy.dmatrices('v2 ~ key1+ key2',data)
X

DesignMatrix with shape (8, 3)
  Intercept  key1[T.b]  key2[T.zero]
          1          0             1
          1          0             0
          1          1             1
          1          1             0
          1          0             1
          1          1             0
          1          0             1
          1          1             1
  Terms:
    'Intercept' (column 0)
    'key1' (column 1)
    'key2' (column 2)

In [29]:
y,X=patsy.dmatrices('v2 ~ key1+ key2+ key1:key2',data)
X

DesignMatrix with shape (8, 4)
  Intercept  key1[T.b]  key2[T.zero]  key1[T.b]:key2[T.zero]
          1          0             1                       0
          1          0             0                       0
          1          1             1                       1
          1          1             0                       0
          1          0             1                       0
          1          1             0                       0
          1          0             1                       0
          1          1             1                       1
  Terms:
    'Intercept' (column 0)
    'key1' (column 1)
    'key2' (column 2)
    'key1:key2' (column 3)

# 13.3 Introduction to statsmodels

> McKinney, Wes. Python for Data Analysis (p. 586). O'Reilly Media. Kindle Edition. 

In [31]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [32]:
np.random.seed(12345)

In [33]:
# Linear model from random data
def dnorm(mean,variance,size=1):
    if isinstance(size,int):
        size=size,
    return mean+np.sqrt(variance)+np.random.randn(*size)

In [34]:
N = 100 
X = np.c_[dnorm( 0, 0.4, size = N),
          dnorm( 0, 0.6, size = N),
          dnorm( 0, 0.2, size = N)] 
eps = dnorm( 0, 0.1, size = N)
beta = [0.1, 0.3, 0.5] 
y = np.dot( X, beta) + eps


In [35]:
X[:5]

array([[ 0.42774787, -0.79106062,  1.5746947 ],
       [ 1.11139887,  0.21205648, -0.12114985],
       [ 0.11301682,  0.74193253,  0.75657576],
       [ 0.07672523, -0.15440953, -0.13017188],
       [ 2.5982361 ,  0.29202402, -0.72142048]])

In [36]:
y[:5]

array([ 2.65626531, -0.97983647,  0.55015556, -0.13332918,  0.68301081])

In [38]:
X.shape

(100, 3)

In [39]:
X_model=sm.add_constant(X)
X_model[:5]

array([[ 1.        ,  0.42774787, -0.79106062,  1.5746947 ],
       [ 1.        ,  1.11139887,  0.21205648, -0.12114985],
       [ 1.        ,  0.11301682,  0.74193253,  0.75657576],
       [ 1.        ,  0.07672523, -0.15440953, -0.13017188],
       [ 1.        ,  2.5982361 ,  0.29202402, -0.72142048]])

In [40]:
model=sm.OLS(y,X)

In [41]:
results=model.fit()

In [42]:
results.params

array([0.39004368, 0.29561838, 0.5841267 ])

In [43]:
print(results.summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.437
Model:                            OLS   Adj. R-squared (uncentered):              0.420
Method:                 Least Squares   F-statistic:                              25.14
Date:                Wed, 14 Oct 2020   Prob (F-statistic):                    4.05e-12
Time:                        19:55:00   Log-Likelihood:                         -153.12
No. Observations:                 100   AIC:                                      312.2
Df Residuals:                      97   BIC:                                      320.0
Df Model:                           3                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [44]:
data = pd.DataFrame( X, columns =['col0', 'col1', 'col2'])
data

Unnamed: 0,col0,col1,col2
0,0.427748,-0.791061,1.574695
1,1.111399,0.212056,-0.121150
2,0.113017,0.741933,0.756576
3,0.076725,-0.154410,-0.130172
4,2.598236,0.292024,-0.721420
...,...,...,...
95,1.427709,1.685579,0.221690
96,0.750565,-0.246306,1.796940
97,-0.116076,-0.638819,1.797513
98,1.217425,2.071205,0.060560


In [45]:
data['y']=y
data[:5]

Unnamed: 0,col0,col1,col2,y
0,0.427748,-0.791061,1.574695,2.656265
1,1.111399,0.212056,-0.12115,-0.979836
2,0.113017,0.741933,0.756576,0.550156
3,0.076725,-0.15441,-0.130172,-0.133329
4,2.598236,0.292024,-0.72142,0.683011


In [46]:
results=smf.ols('y~col0+col1+col2',data=data).fit()


In [47]:
results.params

Intercept    0.459295
col0         0.252297
col1         0.115862
col2         0.520942
dtype: float64

In [48]:
results.tvalues

Intercept    2.882883
col0         2.377438
col1         1.020530
col2         4.510690
dtype: float64

In [49]:
results.predict(data[:5])

0    1.295885
1    0.701156
2    0.967903
3    0.392951
4    0.772840
dtype: float64

In [51]:
y[:5]

array([ 2.65626531, -0.97983647,  0.55015556, -0.13332918,  0.68301081])

# Estimating Time Series Processes


In [53]:
init_x=4

In [54]:
import random

In [56]:
values=[init_x,init_x]
N=1000

b0=.8
b1=-0.4

noise=dnorm(0,0.1,N)

for i in range(N):
    new_x=values[-1]*b0+values[-2]*b1+noise[i]
    values.append(new_x)

In [57]:
MAXLAGS = 5
model = sm.tsa.AR(values)
results = model.fit(MAXLAGS)
results.params

statsmodels.tsa.AR has been deprecated in favor of statsmodels.tsa.AutoReg and
statsmodels.tsa.SARIMAX.

AutoReg adds the ability to specify exogenous variables, include time trends,
and add seasonal dummies. The AutoReg API differs from AR since the model is
treated as immutable, and so the entire specification including the lag
length must be specified when creating the model. This change is too
substantial to incorporate into the existing AR api. The function
ar_select_order performs lag length selection for AutoReg models.

AutoReg only estimates parameters using conditional MLE (OLS). Use SARIMAX to
estimate ARX and related models using full MLE via the Kalman Filter.





array([ 0.30290074,  0.78501265, -0.4086365 , -0.01340513,  0.01965317,
        0.00653243])

# 13.4 Introduction to scikit-learn


In [59]:
from sklearn import datasets
from sklearn import set_config


In [60]:

set_config(print_changed_only=False)

In [64]:
folder='data_sets/titanic/'
train = pd.read_csv(folder+'train.csv')
test = pd.read_csv(folder+'test.csv')

In [63]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [65]:
test.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [66]:
train.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [67]:
test.isnull().sum()

PassengerId      0
Pclass           0
Name             0
Sex              0
Age             86
SibSp            0
Parch            0
Ticket           0
Fare             1
Cabin          327
Embarked         0
dtype: int64

In [68]:
impute_value= train['Age'].median() # median of Age column
impute_value

28.0

In [69]:
train['Age']=train['Age'].fillna(impute_value)
test['Age']=test['Age'].fillna(impute_value)

In [70]:
train['IsFemale']=(train['Sex']=='female').astype(int)
test['IsFemale']=(test['Sex']=='female').astype(int)

In [71]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,IsFemale
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,0
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,1
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,1
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,1
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,0


In [72]:
test.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,IsFemale
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q,0
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S,1
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q,0
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S,0
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S,1


In [73]:
predictors=['Pclass','IsFemale','Age']
X_train=train[predictors].values
X_test=test[predictors].values
y_train=train['Survived'].values

In [74]:
X_train[:5]

array([[ 3.,  0., 22.],
       [ 1.,  1., 38.],
       [ 3.,  1., 26.],
       [ 1.,  1., 35.],
       [ 3.,  0., 35.]])

In [75]:
y_train[:5]

array([0, 1, 1, 1, 0], dtype=int64)

In [76]:
from sklearn.linear_model import LogisticRegression

In [77]:
model=LogisticRegression()
model

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [78]:
model.fit(X_train,y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [79]:
y_pred=model.predict(X_test)
y_pred[:10]

array([0, 0, 0, 0, 1, 0, 1, 0, 1, 0], dtype=int64)

In [80]:
from sklearn.linear_model import LogisticRegressionCV

In [81]:
model=LogisticRegressionCV(10)
model



LogisticRegressionCV(Cs=10, class_weight=None, cv=None, dual=False,
                     fit_intercept=True, intercept_scaling=1.0, l1_ratios=None,
                     max_iter=100, multi_class='auto', n_jobs=None,
                     penalty='l2', random_state=None, refit=True, scoring=None,
                     solver='lbfgs', tol=0.0001, verbose=0)

In [83]:
model.fit(X_train,y_train)

LogisticRegressionCV(Cs=10, class_weight=None, cv=None, dual=False,
                     fit_intercept=True, intercept_scaling=1.0, l1_ratios=None,
                     max_iter=100, multi_class='auto', n_jobs=None,
                     penalty='l2', random_state=None, refit=True, scoring=None,
                     solver='lbfgs', tol=0.0001, verbose=0)

In [82]:
from sklearn.model_selection import cross_val_score

In [84]:
scores=cross_val_score(model,X_train,y_train,cv=4)

In [85]:
scores

array([0.71748879, 0.79820628, 0.77130045, 0.78828829])