# Logistic Regression Code Appendix

Resources: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html

Python Code:

``` Python
# Import dependency
from sklearn.linear_model import LogisticRegression

# Create the logistic regression object
log = LogisticRegression()

# Train the logistic regression model
clf = log.fit(X, y)

# Predict the target class based on p > 0.5 criteria
clf.predict(X)

# Predict the probability with the training data set
clf.predict_proba(X)

# Calculate the model fit
clf.score(X, y)
```

In [1]:
import pandas as pd
import numpy as numpy
from sklearn.linear_model import LogisticRegression

In [2]:
# Loading the fake pizza dataset from the web
pizza = pd.read_csv('https://jaredlander.com/data/Fake%20Pizza%20Data.csv')
pizza.head()

Unnamed: 0,Rating,CostPerSlice,HeatSource,BrickOven,Neighborhood
0,0.03,1.75,Gas,False,LittleItaly
1,4.89,2.75,Coal,True,SoHo
2,4.73,4.0,Wood,True,LittleItaly
3,0.13,1.75,Gas,False,LittleItaly
4,2.45,2.25,Wood,True,Chinatown


In [3]:
from sklearn.preprocessing import LabelEncoder

In [4]:
enc = LabelEncoder()
pizza["BrickOven"] = enc.fit_transform(pizza["BrickOven"])

In [5]:
pizza.head()

Unnamed: 0,Rating,CostPerSlice,HeatSource,BrickOven,Neighborhood
0,0.03,1.75,Gas,0,LittleItaly
1,4.89,2.75,Coal,1,SoHo
2,4.73,4.0,Wood,1,LittleItaly
3,0.13,1.75,Gas,0,LittleItaly
4,2.45,2.25,Wood,1,Chinatown


In [6]:
pizza.dropna(inplace = True)

In [7]:
# Prepare data for model
y = pizza['BrickOven']
X = pizza[['Rating']]

In [17]:
X.shape


(200, 1)

In [9]:
from sklearn.model_selection import train_test_split, cross_validate

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=27)

In [11]:
from sklearn.preprocessing import StandardScaler

ss= StandardScaler()
X_train = ss.fit_transform(X_train)
X_test = ss.transform(X_test)

In [12]:
from sklearn.linear_model import LogisticRegression

In [13]:
log = LogisticRegression(penalty ='l1', solver = 'liblinear')
log.fit(X_train, y_train)


In [14]:
# Predict the class of the target
log.predict(X_train)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0])

In [15]:
# Predict the probability of the target
log.predict_proba(X_train)

array([[0.64109063, 0.35890937],
       [0.6291411 , 0.3708589 ],
       [0.63358177, 0.36641823],
       [0.6269728 , 0.3730272 ],
       [0.64203929, 0.35796071],
       [0.67177803, 0.32822197],
       [0.67709958, 0.32290042],
       [0.66065576, 0.33934424],
       [0.67064001, 0.32935999],
       [0.63823803, 0.36176197],
       [0.62419476, 0.37580524],
       [0.62225737, 0.37774263],
       [0.67721231, 0.32278769],
       [0.62310547, 0.37689453],
       [0.62395281, 0.37604719],
       [0.632144  , 0.367856  ],
       [0.67211904, 0.32788096],
       [0.62225737, 0.37774263],
       [0.67450104, 0.32549896],
       [0.62649026, 0.37350974],
       [0.62225737, 0.37774263],
       [0.62286323, 0.37713677],
       [0.63046362, 0.36953638],
       [0.67325444, 0.32674556],
       [0.62213615, 0.37786385],
       [0.66434534, 0.33565466],
       [0.63621151, 0.36378849],
       [0.67302752, 0.32697248],
       [0.67382138, 0.32617862],
       [0.67461424, 0.32538576],
       [0.