In [1]:
import pandas as pd  
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
from sklearn.datasets import load_boston

In [2]:
boston_data = load_boston()

#RM = 房間數  / LSTAT(ower status of the population) 中下階層人口比例 
boston = pd.DataFrame(boston_data.data, columns=boston_data.feature_names)[['RM', 'LSTAT']]
boston['MEDV'] = boston_data.target # target (Median value of owner-occupied homes in $1000s, 房價中位數)
price_threshold = boston['MEDV'].median()

#標記expensive / inexpensive flag , 作為之後二元分類的Y值
boston['Expensive'] = boston['MEDV'].map(lambda x:0 if x<price_threshold else 1)
boston.head(10)

Unnamed: 0,RM,LSTAT,MEDV,Expensive
0,6.575,4.98,24.0,1
1,6.421,9.14,21.6,1
2,7.185,4.03,34.7,1
3,6.998,2.94,33.4,1
4,7.147,5.33,36.2,1
5,6.43,5.21,28.7,1
6,6.012,12.43,22.9,1
7,6.172,19.15,27.1,1
8,5.631,29.93,16.5,0
9,6.004,17.1,18.9,0


In [3]:
# logistic regression with "RM" and "LSTAT"
logistic = LogisticRegression()
logistic.fit(boston[["RM", "LSTAT"]], boston["Expensive"])

#logistic regression formula
print(f'Expensive = confusion_matrix{logistic.coef_[0,0]}*RM {logistic.coef_[0,1]:+}*LSTAT {logistic.intercept_[0]:+})')

Expensive = confusion_matrix1.1334804350032337*RM -0.3488846128592283*LSTAT -2.889956663218535)


In [43]:
logistic_pred = logistic.predict(boston[["RM", "LSTAT"]].values) #回傳0|1 ndarray
#print(logistic_pred)
print(confusion_matrix(boston["Expensive"], logistic_pred)) #[0,0],[0,1],[1,0],[1,1] , [i=ground true , j = pred label]
print(f'Accuracy Score: {accuracy_score(boston["Expensive"], logistic_pred)}')
print(f'Precision Score: {precision_score(boston["Expensive"], logistic_pred)}')
print(f'Recall_score Score: {recall_score(boston["Expensive"], logistic_pred)}')
print(f'F1 Score: {f1_score(boston["Expensive"], logistic_pred)}')


[[205  46]
 [ 43 212]]
Accuracy Score: 0.8241106719367589
Precision Score: 0.8217054263565892
Recall_score Score: 0.8313725490196079
F1 Score: 0.8265107212475634


In [9]:
#print(boston[["RM", "LSTAT"]].values)
#print(boston.loc[:,["RM","LSTAT"]].values)
logistic_pred_proba = logistic.predict_proba(boston[["RM", "LSTAT"]].values)[:,1]

#預測門權調高(從嚴認定)，precision值會上升
threshold_pred = logistic_pred_proba>0.8 
print(confusion_matrix(boston["Expensive"], threshold_pred))
print(f'Accuracy Score: {accuracy_score(boston["Expensive"], threshold_pred)}')
print(f'Precision Score: {precision_score(boston["Expensive"], threshold_pred)}')
print(f'Recall_score Score: {recall_score(boston["Expensive"], threshold_pred)}')
print(f'F1 Score: {f1_score(boston["Expensive"], threshold_pred)}')

[[248   3]
 [ 88 167]]
Accuracy Score: 0.8201581027667985
Precision Score: 0.9823529411764705
Recall_score Score: 0.6549019607843137
F1 Score: 0.7858823529411765


In [7]:
#預測門檻調低(從寬認定)，recall值會上升
threshold_pred = logistic_pred_proba>0.3 
#print(threshold_pred)
print(confusion_matrix(boston["Expensive"], threshold_pred))
print(f'Accuracy Score: {accuracy_score(boston["Expensive"], threshold_pred)}')
print(f'Precision Score: {precision_score(boston["Expensive"], threshold_pred)}')   #precison分數下降
print(f'Recall_score Score: {recall_score(boston["Expensive"], threshold_pred)}')   #recall 分數上升
print(f'F1 Score: {f1_score(boston["Expensive"], threshold_pred)}')

[[177  74]
 [ 22 233]]
Accuracy Score: 0.8102766798418972
Precision Score: 0.758957654723127
Recall_score Score: 0.9137254901960784
F1 Score: 0.8291814946619217


In [8]:
threshold_pred = logistic_pred_proba>0.7 
print(confusion_matrix(boston["Expensive"], threshold_pred))
print(f'Accuracy Score: {accuracy_score(boston["Expensive"], threshold_pred)}')
print(f'Precision Score: {precision_score(boston["Expensive"], threshold_pred)}')
print(f'Recall_score Score: {recall_score(boston["Expensive"], threshold_pred)}')
print(f'F1 Score: {f1_score(boston["Expensive"], threshold_pred)}')

[[238  13]
 [ 68 187]]
Accuracy Score: 0.8399209486166008
Precision Score: 0.935
Recall_score Score: 0.7333333333333333
F1 Score: 0.8219780219780219
