## 練習一：回歸問題(Regression)

#### 1.1導入需要的函式

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn.ensemble

from sklearn.metrics import mean_squared_error, confusion_matrix
from sklearn.metrics import mean_absolute_error,mean_squared_error,mean_absolute_percentage_error
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

#### 1.2查看資料樣態

In [None]:
# 載入小費(tips)資料集
tips = pd.read_csv(r'./tips.csv')
tips.head()

#### 1.3資料前處理

In [None]:
# 進行One hot encoding
data = pd.get_dummies(tips) 
data.head()

In [None]:
# 檢查空值
data.isnull().values.any()

#### 1.4資料集切分

In [None]:
# 特徵/標籤劃分
feature = data.drop('total_bill',axis=1)
label = data['total_bill']

X_train, X_test, y_train, y_test = train_test_split(feature, label, random_state=0) #訓練測試資料拆分

In [None]:
# 模型建立與預測
model = LinearRegression()
model.fit(X_train, y_train)
y_pred=model.predict(X_test)

In [None]:
# 結果可視化
plt.figure(figsize=(10,4))
plt.plot(range(len(y_test)),y_test,'o-', label="Real")
plt.plot(range(len(y_pred)),y_pred,'o-', label="Pred")
plt.legend()
plt.ylabel("total_bill")
plt.show()

In [None]:
print('MAE:',mean_absolute_error(y_test,y_pred)) #計算 MAE
print('MSE:',mean_squared_error(y_test,y_pred)) #計算 MSE
print('RMSE:',np.sqrt(mean_squared_error(y_test,y_pred))) #計算 RMSE
print('MAPE:',mean_absolute_percentage_error(y_test,y_pred)) #計算 MAPE 

# **真實值在前，預測值在後

## 練習二：分類問題(Classification)

In [None]:
# 載入需要的套件
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import MinMaxScaler

In [None]:
# 將資料集存成Dataframe格式
df = pd.DataFrame(data= load_iris().data, columns=load_iris().feature_names)
df['Target'] = load_iris().target
df

In [None]:
# 把特徵與目標分開
X = df.drop('Target' , axis=1)
y = df['Target']

In [None]:
# 做正規化 (不是標準化!!!!!!)
X = MinMaxScaler().fit_transform(X)

In [None]:
# 切分資料集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# 訓練模型
cart = DecisionTreeClassifier()
cart.fit(X_train, y_train)

In [None]:
# 使用測試集來進行預測
y_pred = cart.predict(X_test)

In [None]:
# 評估指標
print('ACC: ', accuracy_score(y_test, y_pred))
print('Confusion matrix: \n', confusion_matrix(y_test, y_pred))
print('Report : \n', classification_report(y_test, y_pred))

## 練習三：聚類問題(Clustering)

In [None]:
from sklearn.cluster import KMeans

In [None]:
km = KMeans(n_clusters=3)
## 沿用上面iris資料集的數據
km.fit(X)
plt.scatter(X[:, 0], X[:, 1], c=km.labels_)
## x是每群的中心
plt.scatter(km.cluster_centers_[:, 0], km.cluster_centers_[:, 1], marker='x', s=100, color='red')
plt.show()