## Jupyter Basic

    ### Click the title to rename the file
    ### Press 'a' while selecting a cell to add a cell above
    ### Press 'b' while selecting a cell to add a cell below
    ### Press 'dd' while selecting a cell to delete it
    ### Additionally, you can cut/copy/paste/move cells
    ### Dragging is possible while holding Shift
    ### Press 'M' while selecting a cell to convert it to Markdown
    ### To revert to code, use the second list box from the right below the main widget
    ### Press Ctrl + Enter to execute
    ### Press Shift + Enter to execute and create a new line
    ### Various other menu options are also available

## Machine Learning Basic

Predict Math score with Korean score.

### Raw Data

In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('./data/sample.csv', header=0, sep=',', encoding='euc-kr')

In [3]:
data.head()

Unnamed: 0,Name,Korean,Math
0,Kang Daniel,10,55
1,Kang Dongho,60,80
2,Kang Mina,0,50
3,Kwon Hyunbin,20,60
4,Kim Doyeon,60,80


In [4]:
data = data[['Korean', 'Math']].values

In [5]:
data

array([[10, 55],
       [60, 80],
       [ 0, 50],
       [20, 60],
       [60, 80],
       [30, 65],
       [60, 80],
       [60, 80],
       [10, 55],
       [40, 70],
       [70, 85],
       [10, 55],
       [30, 65],
       [ 0, 50],
       [20, 60],
       [60, 80],
       [90, 95],
       [ 0, 50],
       [ 0, 50],
       [90, 95],
       [10, 55],
       [90, 95],
       [60, 80],
       [10, 55]], dtype=int64)

In [6]:
data.shape

(24, 2)

### Split Data: Train data, Test Data

In [7]:
test_idxs = [5, 18, 22, 23]

In [8]:
train_idxs = [idx for idx in range(len(data)) if idx not in test_idxs]

In [9]:
train_data = data[train_idxs]

In [10]:
train_data

array([[10, 55],
       [60, 80],
       [ 0, 50],
       [20, 60],
       [60, 80],
       [60, 80],
       [60, 80],
       [10, 55],
       [40, 70],
       [70, 85],
       [10, 55],
       [30, 65],
       [ 0, 50],
       [20, 60],
       [60, 80],
       [90, 95],
       [ 0, 50],
       [90, 95],
       [10, 55],
       [90, 95]], dtype=int64)

In [11]:
test_data = data[test_idxs]

In [12]:
test_data

array([[30, 65],
       [ 0, 50],
       [60, 80],
       [10, 55]], dtype=int64)

### Set Models

In [13]:
def f(w, x):
    return w*x

In [14]:
w = 1.5

### Set Loss Function

In [15]:
def loss(pre, y):
    return ((pre-y)**2).mean()

### Set Search Method

In [16]:
def search_method(loss, w):
    if loss > 100:
        w = w - 0.1
    else:
        w = w - 0.01
    return w

### Split Data: Train data, Validation Data

In [17]:
train_len = 16
train_idxs = np.random.choice(len(train_data), train_len, replace=False)

In [18]:
train_idxs

array([15, 13, 14, 16,  4,  6, 12,  7,  8,  2,  9, 17, 19, 18, 10,  0])

In [19]:
train_data = train_data[train_idxs]

In [20]:
val_idxs = [idx for idx in range(len(train_data)) if idx not in train_idxs]

In [21]:
val_idxs

[1, 3, 5, 11]

In [22]:
val_data = train_data[val_idxs]

### Train and Validate

In [23]:
for i in range(5):
    x = train_data[:, 0]
    y = train_data[:, 1]
    
    pre = f(w, x)
    train_loss = loss(pre, y)
    
    x = val_data[:, 0]
    y = val_data[:, 1]
    pre = f(w, x)
    val_loss = loss(pre, y)
    
    print("--- Step %d ---"%(i+1))
    print("Current Model: \t %2.2f"%(w))
    print("Train Loss: \t %2.4f"%(train_loss))
    print("Val Loss: \t %2.4f"%(val_loss))
    
    w = search_method(train_loss, w)

--- Step 1 ---
Current Model: 	 1.50
Train Loss: 	 1275.0000
Val Loss: 	 1275.0000
--- Step 2 ---
Current Model: 	 1.40
Train Loss: 	 1159.0000
Val Loss: 	 1125.2500
--- Step 3 ---
Current Model: 	 1.30
Train Loss: 	 1096.0000
Val Loss: 	 1036.0000
--- Step 4 ---
Current Model: 	 1.20
Train Loss: 	 1086.0000
Val Loss: 	 1007.2500
--- Step 5 ---
Current Model: 	 1.10
Train Loss: 	 1129.0000
Val Loss: 	 1039.0000


### Select Model

In [24]:
w_best = 1.60

### Test Model

In [25]:
x = test_data[:, 0]
y = test_data[:, 1]
    
pre = f(w_best, x)

In [26]:
loss(pre, y)

1141.5

In [27]:
print("x", x)
print("y", y)
print("pre", pre)

x [30  0 60 10]
y [65 50 80 55]
pre [48.  0. 96. 16.]
