# データサイエンス(Python)勉強会用 2021/07/30 (金)
## 必要なライブラリのインストール

In [None]:
!pip3 install seaborn

## 使用するライブラリのインポート

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.datasets import load_iris
from sklearn import svm
from sklearn.metrics import confusion_matrix
import seaborn as sns

## データの傾向を確認

In [None]:
ds = load_iris()
name_map = {i:j for i, j in enumerate(ds['target_names'])}

iris_df = pd.DataFrame(ds.data,columns=ds.feature_names)
iris_df['target'] = ds['target']
iris_df['target_names'] = iris_df['target'].map(lambda x: name_map[x])
iris_df.head(5)

In [None]:
iris_df.describe()

## irisデータをペアプロット

In [None]:
sns.pairplot(iris_df, hue="target")

In [None]:
"""
ds.data
0: sepal length (cm) ガクの長さ
1: sepal width (cm) ガクの幅
2: petal length (cm) 花弁の長さ
3: petal width (cm) 花弁の幅
"""

X = ds.data[:, [0, 2]]
y = ds.target

h = 0.02  # step size in the mesh
x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

def decision_boundary(clf, X, y, ax, title):
    clf.fit(X, y)
 
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    ax.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)
    ax.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k', cmap=plt.cm.Paired)

    ax.set_title(title)
    ax.set_xlabel('sepal length')
    ax.set_ylabel('petal length')

## k近傍法を適用する

#### k = x
#### xの値を変更して、実行した結果を見てみましょう。

In [None]:
from sklearn.neighbors import KNeighborsClassifier

k = 100

fig, axes = plt.subplots(1, 1, figsize=(10, 10))

title = "%s neighbor(s)"% (k)
clf = KNeighborsClassifier(n_neighbors=k)
decision_boundary(clf, X, y, axes, title)

## 線形SVMを適用

#### C = x
#### x の値を変更して、実行した結果を見てみましょう。

In [None]:
from sklearn.svm import LinearSVC

# 0.01 〜 100 の範囲で変更
C = 0.01

fig, axes = plt.subplots(1, 1, figsize=(10, 10))

title = "C=%s"% (C)
clf = LinearSVC(C=C)
decision_boundary(clf, X, y, axes, title)

In [None]:
iris_arr = np.array([iris_df["sepal length (cm)"].tolist(),
                    iris_df['sepal width (cm)'].tolist(),
                    iris_df['petal length (cm)'].tolist(),
                    iris_df['petal width (cm)'].tolist()])
iris_arr = iris_arr.T
iris_arr

## k-meansの適用

In [None]:
clusters=3

pred = KMeans(n_clusters=clusters).fit_predict(iris_arr)
iris_df['cluster_id']=pred
pred # 予測値を出力

In [None]:
iris_df['cluster_id'].value_counts()

In [None]:
cluster_ids = sorted(iris_df['cluster_id'].unique())

sepal_len = 'sepal length (cm)'
sepal_wid = 'sepal width (cm)'
petal_len = 'petal length (cm)'
petal_wid = 'petal width (cm)'

fig = plt.figure(figsize=(12, 12))
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)
ax4 = fig.add_subplot(2, 2, 4)
for i in cluster_ids:
    ax1.set_title('x = {0} : y = {1}'.format(sepal_len, sepal_wid), loc='center')
    ax1.scatter(iris_df[iris_df['cluster_id']==i][sepal_len].values, iris_df[iris_df['cluster_id']==i][sepal_wid])
    
    ax2.set_title('x = {0} : y = {1}'.format(petal_len, petal_wid), loc='center')
    ax2.scatter(iris_df[iris_df['cluster_id']==i][petal_len].values, iris_df[iris_df['cluster_id']==i][petal_wid])

    ax3.set_title('x = {0} : y = {1}'.format(sepal_len, petal_len), loc='center')
    ax3.scatter(iris_df[iris_df['cluster_id']==i][sepal_len].values, iris_df[iris_df['cluster_id']==i][petal_len])

    ax4.set_title('x = {0} : y = {1}'.format(sepal_wid, petal_wid), loc='center')
    ax4.scatter(iris_df[iris_df['cluster_id']==i][sepal_wid].values, iris_df[iris_df['cluster_id']==i][petal_wid])
plt.show()

In [None]:
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure()
ax = Axes3D(fig)

for i in cluster_ids:
    X = iris_df[iris_df['cluster_id']==i]['sepal length (cm)'].values
    Y = iris_df[iris_df['cluster_id']==i]['petal length (cm)'].values
    Z = iris_df[iris_df['cluster_id']==i]['petal width (cm)'].values

    # X軸,Y軸,Z軸にそれぞれラベルを設定
    ax.set_xlabel("sepal length (cm)")
    ax.set_ylabel("petal length (cm)")
    ax.set_zlabel("petal width (cm)")

    ax.plot(X, Y, Z, marker="o", linestyle='None')

plt.show()