### 這堂課目標
<li>Deep Neural Network (DNN)</li>

- <a href="https://drive.google.com/open?id=1io6A-ZRL9KPw1PJ3fjJNij2Pl7RLjOWA">投影片1 : 深度學習類型介紹</a>

- <a href="https://drive.google.com/open?id=1inmvcLopAGhe8h5Zircsb4ToBEgehhxC">投影片2 : 名詞解釋</a>

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
import sklearn.model_selection

#### 1.載入資料

In [2]:
df = pd.read_csv('StudentsPerformance.csv')
#data source:https://www.kaggle.com/spscientist/students-performance-in-exams

In [3]:
df.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [4]:
df.shape

(1000, 8)

#### 2.清洗資料（data cleaning / Feature engineering）

In [5]:
target = df['math score'] #建立預測目標
one_hot_data = df.drop(['writing score','math score', 'reading score'],axis=1) #準備做one hot encoding
data = pd.get_dummies(one_hot_data) #one hot encoding
data = pd.concat([data,df[['writing score','reading score']]],axis=1) #把數值資料補回去df內

In [6]:
print(data.shape)
data.head()

(1000, 19)


Unnamed: 0,gender_female,gender_male,race/ethnicity_group A,race/ethnicity_group B,race/ethnicity_group C,race/ethnicity_group D,race/ethnicity_group E,parental level of education_associate's degree,parental level of education_bachelor's degree,parental level of education_high school,parental level of education_master's degree,parental level of education_some college,parental level of education_some high school,lunch_free/reduced,lunch_standard,test preparation course_completed,test preparation course_none,writing score,reading score
0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,1,74,72
1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,1,0,88,90
2,1,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,93,95
3,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,44,57
4,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,1,75,78


#### 3.分訓練集跟測試集（train test split）

In [7]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, X, y):
        if not torch.is_tensor(X) and not torch.is_tensor(y):
            self.X = torch.from_numpy(X)
            self.y = torch.from_numpy(y)

    def __length__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]

In [8]:
input_data = Dataset(data.to_numpy(),target.to_numpy())

In [9]:
train_indices, test_indices, _, _ = sklearn.model_selection.train_test_split(range(len(data)),input_data.y,test_size=0.1)

In [10]:
train_split = torch.utils.data.Subset(input_data, train_indices)
test_split = torch.utils.data.Subset(input_data, test_indices)

In [11]:
train_batches = torch.utils.data.DataLoader(train_split, batch_size=100)
test_batches = torch.utils.data.DataLoader(test_split, batch_size=100)

## DNN

#### 4.定義模型

In [12]:
# Create a neural net class
class Net(nn.Module):
    # Constructor
    def __init__(self, num_classes=1):
        super(Net, self).__init__()
        
        # We'll apply max pooling with a kernel size of 2
        self.fc1 = nn.Linear( in_features=input_data.X.shape[1],out_features=100)
        
        self.fc2 = nn.Linear( in_features=100,out_features=100)
        
        self.fc3 = nn.Linear( in_features=100,out_features=1)
        
    def forward(self, x):
        # Use a relu activation function after layer 1 (convolution 1 and pool)
        x = F.relu(self.fc1(x))
      
        # Use a relu activation function after layer 2 (convolution 2 and pool)
        x = F.relu(self.fc2(x))
    
        x = self.fc3(x)
        return x
    
print("DNN model class defined!")

DNN model class defined!


#### 5.訓練模型

In [13]:
model = Net()
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_function = nn.MSELoss()

In [35]:
 # Run the training loop
for epoch in range(0, 10): # 5 epochs at maximum
    # Print epoch
    print(f'Starting epoch {epoch+1}')

    # Set current loss value
    current_loss = []

    # Iterate over the DataLoader for training data
    for i, train_data in enumerate(train_batches, 0):

      # Get and prepare inputs
        inputs, targets = train_data
        inputs, targets = inputs.float(), targets.float()
        targets = targets.reshape((targets.shape[0], 1))

      # Zero the gradients
        optimizer.zero_grad()

        # Perform forward pass
        outputs = model(inputs)

        # Compute loss
        loss = loss_function(outputs, targets)

        # Perform backward pass
        loss.backward()

        # Perform optimization
        optimizer.step()

        # Print statistics
        current_loss.append(loss.item()**0.5)
    print('rmse after epoch:' + str(np.round(np.mean(current_loss),3)))
    current_loss = []

# Process is complete.
print('Training process has finished.')

Starting epoch 1
rmse after epoch:8.597
Starting epoch 2
rmse after epoch:8.556
Starting epoch 3
rmse after epoch:8.513
Starting epoch 4
rmse after epoch:8.469
Starting epoch 5
rmse after epoch:8.423
Starting epoch 6
rmse after epoch:8.376
Starting epoch 7
rmse after epoch:8.327
Starting epoch 8
rmse after epoch:8.277
Starting epoch 9
rmse after epoch:8.226
Starting epoch 10
rmse after epoch:8.173
Training process has finished.


In [26]:
for i, test_data in enumerate(test_batches, 0):

      # Get and prepare inputs
    inputs, targets = test_data
    inputs, targets = inputs.float(), targets.float()
    outputs = model(inputs)

In [27]:
print(outputs.reshape(100,)[:5])
print(targets[:5])

tensor([71.6355, 86.8580, 67.9805, 66.2267, 69.1549], grad_fn=<SliceBackward0>)
tensor([77., 79., 63., 58., 74.])


#### 6.評估模型表現

In [28]:
dnn_pred = outputs.detach().numpy()

In [29]:
gt = targets.reshape(100,1).detach().numpy()

In [30]:
df = pd.DataFrame(np.concatenate((gt,dnn_pred),axis=1),columns=['gt','dnn_pred'])
df.head(10)

Unnamed: 0,gt,dnn_pred
0,77.0,71.635536
1,79.0,86.857964
2,63.0,67.980537
3,58.0,66.226707
4,74.0,69.154945
5,75.0,73.178017
6,74.0,71.606285
7,60.0,62.197605
8,62.0,60.376976
9,63.0,65.185852


#### 7.調整參數（重複5,6）

## XGB 比較

In [31]:
import xgboost as xgb
from sklearn import metrics

def auc(m, train, test): 
    return (metrics.roc_auc_score(y_train,m.predict_proba(train)[:,1]),
                            metrics.roc_auc_score(y_test,m.predict_proba(test)[:,1]))
xgbr = xgb.XGBRegressor(max_depth=50, min_child_weight=1, n_estimators=200,verbosity=1,learning_rate=0.16,)



In [32]:
eval_set = [(data.iloc[test_indices], target.iloc[test_indices])]
xgbr.fit(data.iloc[train_indices], target.iloc[train_indices], eval_metric="rmse", eval_set=eval_set, verbose=True,early_stopping_rounds=2)

[0]	validation_0-rmse:56.48219
[1]	validation_0-rmse:47.61301
[2]	validation_0-rmse:40.17093
[3]	validation_0-rmse:33.91412
[4]	validation_0-rmse:28.68399
[5]	validation_0-rmse:24.26470
[6]	validation_0-rmse:20.59868
[7]	validation_0-rmse:17.57337
[8]	validation_0-rmse:15.07395
[9]	validation_0-rmse:13.08186
[10]	validation_0-rmse:11.41258
[11]	validation_0-rmse:10.04908
[12]	validation_0-rmse:9.02143
[13]	validation_0-rmse:8.24560
[14]	validation_0-rmse:7.68376
[15]	validation_0-rmse:7.11584
[16]	validation_0-rmse:6.77168
[17]	validation_0-rmse:6.46634
[18]	validation_0-rmse:6.25602
[19]	validation_0-rmse:6.12996
[20]	validation_0-rmse:6.06185
[21]	validation_0-rmse:6.05015
[22]	validation_0-rmse:6.04269
[23]	validation_0-rmse:6.05634


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
             gamma=0, gpu_id=-1, importance_type=None,
             interaction_constraints='', learning_rate=0.16, max_delta_step=0,
             max_depth=50, min_child_weight=1, missing=nan,
             monotone_constraints='()', n_estimators=200, n_jobs=8,
             num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
             reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact',
             validate_parameters=1, verbosity=1)

In [33]:
xgb_pred = xgbr.predict(data.iloc[test_indices])

In [34]:
df['xgb_pred']=xgb_pred
df.head(10)

Unnamed: 0,gt,dnn_pred,xgb_pred
0,77.0,71.635536,66.530502
1,79.0,86.857964,89.74543
2,63.0,67.980537,60.826881
3,58.0,66.226707,56.810635
4,74.0,69.154945,64.693123
5,75.0,73.178017,76.763908
6,74.0,71.606285,68.277603
7,60.0,62.197605,64.171295
8,62.0,60.376976,64.59478
9,63.0,65.185852,60.586998


## ML (Machine Learning v.s. NN (Neural Network)
- ML類別資料有比DL有更好的表現
- ML訓練時間比較短
- ML更能表達運算過程的含義（DL常被形容為黑盒子）
- NN可以處理更多更酷的問題

# 無作業