# Logistic and Linear regression
Linear Regression is used to handle regression problems whereas Logistic regression is used to handle the classification problems.
Linear regression provides a continuous output but Logistic regression provides discreet output.
The purpose of Linear Regression is to find the best-fitted line while Logistic regression is one step ahead and fitting the line values to the sigmoid curve.
The method for calculating loss function in linear regression is the mean squared error whereas for logistic regression it is maximum likelihood estimation.

## Logistic Regression (Classification Problem)

In [1]:
from sklearn import datasets
import numpy as np

#dataset import
iris = datasets.load_iris()
diabetes = datasets.load_diabetes()

In [2]:
#train and test dataset division
indices = np.random.permutation(len(iris.data))

train_in = iris.data[indices[:-10]]
train_out = iris.target[indices[:-10]]

test_in = iris.data[indices[-10:]]
test_out = iris.target[indices[-10:]]

In [3]:
from sklearn import linear_model

logistic = linear_model.LogisticRegression(C=1e5, solver = "liblinear", multi_class = "auto")
logistic = logistic.fit(train_in, train_out)
print(logistic)

print("Valores previstos: " , logistic.predict(test_in))
print("Valores reais: " , test_out)

LogisticRegression(C=100000.0, solver='liblinear')
Valores previstos:  [2 1 2 0 1 1 0 2 2 0]
Valores reais:  [2 1 2 0 1 1 0 2 2 0]


## Linear Regression (Regression Problem)

In [4]:
# regression dataset
X_train = diabetes.data[:-20]
X_test = diabetes.data[-20:]
y_train = diabetes.target[:-20]
y_test = diabetes.target[-20:]

In [5]:
from sklearn import linear_model
regr_model = linear_model.LinearRegression()
regr_model = regr_model.fit(X_train, y_train)
print(regr_model)
print("Valores previstos: " , regr_model.predict(X_test))
print("Valores reais: " , y_test)


LinearRegression()
Valores previstos:  [197.61846908 155.43979328 172.88665147 111.53537279 164.80054784
 131.06954875 259.12237761 100.47935157 117.0601052  124.30503555
 218.36632793  61.19831284 132.25046751 120.3332925   52.54458691
 194.03798088 102.57139702 123.56604987 211.0346317   52.60335674]
Valores reais:  [233.  91. 111. 152. 120.  67. 310.  94. 183.  66. 173.  72.  49.  64.
  48. 178. 104. 132. 220.  57.]


In [6]:
ridge = linear_model.Ridge(alpha=.1)
ridge = ridge.fit(X_train, y_train)
print("Valores previstos: " , ridge.predict(X_test))

Valores previstos:  [195.01911216 153.17149511 169.39544476 106.01296055 166.5133624
 133.75708257 257.43812783 103.063527   117.87817227 123.59757023
 216.24272342  66.88652254 136.95957319 122.35356866  57.84501965
 190.51188353 110.6371237  129.5404851  203.47164445  52.50623249]


In [7]:
lasso = linear_model.Lasso()
lasso = lasso.fit(X_train, y_train)
print("Valores previstos: " , lasso.predict(X_test))

Valores previstos:  [194.0266481  144.06218451 164.21351573 131.44370138 152.24238072
 141.21304705 206.03485031 133.08806649 129.18988752 137.73743129
 189.84297027 115.59492356 139.83064096 132.53551415 107.35741725
 170.39555034 141.13149163 133.32409459 180.73179803 124.79153565]


In [8]:
digits = datasets.load_digits()
numtst = int(digits.data.shape[0] / 3)
print("Numero de exemplos para teste: ", numtst)

indices_dig = np.random.permutation(len(digits.data))

dig_train_in = digits.data[indices_dig[:-numtst]]
dig_train_out = digits.target[indices_dig[:-numtst]]

dig_test_in  = digits.data[indices_dig[-numtst:]]
dig_test_out = digits.target[indices_dig[-numtst:]]

dig_train_in.shape, dig_train_out.shape, dig_test_in.shape, dig_test_out.shape

Numero de exemplos para teste:  599


((1198, 64), (1198,), (599, 64), (599,))

In [9]:
dig_logr = linear_model.LogisticRegression(C=1e5, solver = "liblinear", multi_class = "auto")
dig_logr.fit(dig_train_in, dig_train_out)
prev_logr = dig_logr.predict(dig_test_in)

In [10]:
from sklearn.metrics import accuracy_score
print("PECC (accuracy):", accuracy_score(dig_test_out, prev_logr) )

PECC (accuracy): 0.9432387312186978


In [11]:
from sklearn.metrics import confusion_matrix

confusion_matrix(dig_test_out, prev_logr)

array([[56,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0, 55,  2,  0,  0,  0,  1,  0,  3,  0],
       [ 0,  1, 51,  1,  0,  0,  0,  0,  1,  0],
       [ 0,  0,  0, 60,  0,  2,  0,  1,  4,  0],
       [ 0,  0,  0,  0, 58,  0,  0,  0,  0,  2],
       [ 0,  0,  0,  0,  0, 73,  0,  0,  0,  1],
       [ 0,  1,  0,  0,  0,  0, 52,  0,  1,  0],
       [ 0,  0,  0,  0,  0,  0,  0, 58,  0,  1],
       [ 0,  3,  0,  1,  1,  0,  0,  0, 45,  0],
       [ 0,  0,  1,  1,  0,  1,  0,  0,  4, 57]])