## Jupyter Basic

    ### 제목을 클릭하면 파일 이름 변경 가능
    ### 셀을 누르고 a를 누르면, 위에 셀 추가
    ### 셀을 누르고 b를 누르면, 아래에 셀 추가
    ### 셀을 누르고 dd를 누르면, 해당 셀 삭제
    ### 이 외에도 잘라내기/복사/붙여넣기/이동 가능
    ### Shift를 누른 상태에서 드래그 가능
    ### 셀을 클릭 후 M을 누르면 Markdown으로 변환
    ### 다시 코드로 변경하려면 메인 위젯 아래의 오른쪽 2번째 리스트 박스 활용
    ### Ctrl + Enter를 누르면 실행
    ### Shift + Enter를 누르면 실행하며 새로운 줄 생성
    ### 이 외에도 다양한 메뉴 활용 가능

## Machine Learning Basic

Predict Math score with Korean score.

### Raw Data

In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('./data/sample.csv', header=0, sep=',', encoding='euc-kr')

In [3]:
data.head()

Unnamed: 0,이름,국어성적,수학성적
0,강다니엘,10,55
1,강동호,60,80
2,강미나,0,50
3,권현빈,20,60
4,김도연,60,80


In [4]:
data = data[['국어성적', '수학성적']].values

In [5]:
data

array([[10, 55],
       [60, 80],
       [ 0, 50],
       [20, 60],
       [60, 80],
       [30, 65],
       [60, 80],
       [60, 80],
       [10, 55],
       [40, 70],
       [70, 85],
       [10, 55],
       [30, 65],
       [ 0, 50],
       [20, 60],
       [60, 80],
       [90, 95],
       [ 0, 50],
       [ 0, 50],
       [90, 95],
       [10, 55],
       [90, 95],
       [60, 80],
       [10, 55]], dtype=int64)

In [6]:
data.shape

(24, 2)

### Split Data: Train data, Test Data

In [7]:
train_len = 20
train_idxs = np.random.choice(len(data), train_len, replace=False)

In [8]:
train_idxs

array([14,  0,  5, 11, 19,  3,  7, 13,  4, 17, 10, 18, 22,  9, 20, 21, 15,
       12, 16, 23])

In [9]:
train_data = data[train_idxs]

In [10]:
train_data

array([[20, 60],
       [10, 55],
       [30, 65],
       [10, 55],
       [90, 95],
       [20, 60],
       [60, 80],
       [ 0, 50],
       [60, 80],
       [ 0, 50],
       [70, 85],
       [ 0, 50],
       [60, 80],
       [40, 70],
       [10, 55],
       [90, 95],
       [60, 80],
       [30, 65],
       [90, 95],
       [10, 55]], dtype=int64)

In [11]:
test_idxs = [idx for idx in range(len(data)) if idx not in train_idxs]

In [12]:
test_idxs

[1, 2, 6, 8]

In [13]:
test_data = data[test_idxs]

In [14]:
test_data

array([[60, 80],
       [ 0, 50],
       [60, 80],
       [10, 55]], dtype=int64)

### Set Models

In [15]:
def f(w, x):
    return w*x

### Set Loss Function

In [16]:
def loss(pre, y):
    return ((pre-y)**2).sum()

### Split Data: Train data, Validation Data

In [17]:
train_len = 16
train_idxs = np.random.choice(len(train_data), train_len, replace=False)

In [18]:
train_idxs

array([19, 12, 13,  2,  5,  9, 10, 18,  7,  1, 15, 14,  4,  8,  0, 16])

In [19]:
train_data = train_data[train_idxs]

In [20]:
val_idxs = [idx for idx in range(len(train_data)) if idx not in train_idxs]

In [21]:
val_idxs

[3, 6, 11]

In [22]:
val_data = train_data[val_idxs]

### Train and Validate

In [23]:
# Rule-based traning
# Loss가 이전보다 작으면 0.1씩 감소
# 그렇지 않다면 0.1씩 증가

w = 1.5 # 초기 가중치는 1.5로 설정
# := "국어 성적의 1.5배가 수학 성적일 것이다."
prev_loss_value = np.inf # 초기 Loss는 가장 큰 무한으로 설정

for i in range(5):
    x = train_data[:, 0]
    y = train_data[:, 1]
    
    pre = f(w, x)
    train_loss_value = loss(pre, y)
    
    x = val_data[:, 0]
    y = val_data[:, 1]
    pre = f(w, x)
    val_loss_value = loss(pre, y)
    
    print("--- Step %d ---"%(i+1))
    print("Current Model: \t %2.2f"%(w))
    print("Trai Loss: \t %2.4f"%(train_loss_value))
    print("Val Loss: \t %2.4f"%(val_loss_value))
    
    # Rule-based traning
    if prev_loss_value > train_loss_value:
        w -= 0.1
    else:
        w += 0.1
    prev_loss_value = train_loss_value

--- Step 1 ---
Current Model: 	 1.50
Trai Loss: 	 17600.0000
Val Loss: 	 2400.0000
--- Step 2 ---
Current Model: 	 1.40
Trai Loss: 	 15916.0000
Val Loss: 	 2379.0000
--- Step 3 ---
Current Model: 	 1.30
Trai Loss: 	 15104.0000
Val Loss: 	 2476.0000
--- Step 4 ---
Current Model: 	 1.20
Trai Loss: 	 15164.0000
Val Loss: 	 2691.0000
--- Step 5 ---
Current Model: 	 1.30
Trai Loss: 	 15104.0000
Val Loss: 	 2476.0000


### Select Model

In [24]:
w_best = 1.40

### Test Model

In [25]:
x = test_data[:, 0]
y = test_data[:, 1]
    
pre = f(w_best, x)

In [26]:
loss(pre, y)

4213.0

In [28]:
print("x", x)
print("y", y)
print("pre", pre)

x [60  0 60 10]
y [80 50 80 55]
pre [84.  0. 84. 14.]
