In [0]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from mlxtend.plotting import plot_decision_regions

In [0]:
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data', header=None)

df.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash',
                   'Alcalinity of ash', 'Magnesium', 'Total phenols',
                   'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins',
                   'Color intensity', 'Hue', 'OD280/OD315 of diluted wines',
                   'Proline']

pd.DataFrame(df.head())

In [0]:
print('df shape', df.shape)

In [0]:
# 特徴量に色（10列）とプロリンの量(13列)を選択
X = df.iloc[:,[10,13]].values
# 正解ラベルの設定(ラベルはゼロから開始するようマイナス1する)
y = df.iloc[:, 0].values -1
X[:5], y[:5]

In [0]:
df.iloc[:,0].values

In [0]:
#from sklearn import datasets
#wine = datasets.load_wine()
#X = wine.data[:,[9,12]]
#y = wine.target
#X[:5], y[:5]

In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
print('X_trainの形状：',X_train.shape,' y_trainの形状：', y_train.shape,' X_testの形状：', X_test.shape,' y_testの形状：', y_test.shape)

In [0]:
# 標準化
sc = StandardScaler()
X_train_std = sc.fit_transform(X_train)
X_test_std = sc.transform(X_test)

In [0]:
# ロジスティック回帰モデルを作成
model = LogisticRegression(max_iter=100, multi_class = 'ovr', solver='liblinear', C=1.0, penalty='l2', l1_ratio=None, random_state=0)
model.fit(X_train_std, y_train)

In [0]:
y_train_pred = model.predict(X_train_std)
ac_score = accuracy_score(y_train, y_train_pred)
print('正解率 = %.2f' % (ac_score))

In [0]:
y_test_pred = model.predict(X_test_std)
ac_score = accuracy_score(y_test, y_test_pred)
print('正解率 = %.2f' % (ac_score))

In [0]:
# ロジスティック回帰モデルによる訓練データのプロット
plt.figure(figsize=(8,4)) 
plot_decision_regions(X_train_std, y_train, model)

In [0]:
#  ロジスティック回帰モデルによるテストデータのプロット
plt.figure(figsize=(8,4)) 
plot_decision_regions(X_test_std, y_test, model) 

In [0]:
print(model.coef_) 
print(model.intercept_) 

In [0]:
new_data = [[0.1,-0.1]] # 未知データの作成 
print('ロジスティック回帰')
print('予測',model.predict(new_data)) 
print('スコア',model.decision_function(new_data)) 
print('確率',model.predict_proba(new_data)) 

In [0]:
# 未知データの色とプロリンの標準化前の特徴量
sc.inverse_transform(new_data)

In [0]:
# ソフトマックス回帰モデルを作成
model2 = LogisticRegression(max_iter=100, multi_class = 'multinomial', solver='lbfgs', C=1.0, penalty='l2', l1_ratio=None, random_state=0)

model2.fit(X_train_std, y_train)

In [0]:
y_train_pred = model2.predict(X_train_std)
ac_score = accuracy_score(y_train, y_train_pred)
print('正解率 = %.2f' % (ac_score))

In [0]:
y_test_pred = model2.predict(X_test_std)
ac_score = accuracy_score(y_test, y_test_pred)
print('正解率 = %.2f' % (ac_score))

In [0]:
print('ソフトマックス回帰')
print('予測',model2.predict(new_data))
print('スコア',model2.decision_function(new_data))
print('確率',model2.predict_proba(new_data)) 

In [0]:
plt.figure(figsize=(8,4)) 
plot_decision_regions(X_train_std, y_train, model2)

In [0]:
plt.figure(figsize=(8,4))
plot_decision_regions(X_test_std, y_test, model2) 

In [0]:
print(model2.coef_) 
print(model2.intercept_)