## Get and prepare Data

In [24]:
import numpy as np
import pandas as pd

In [25]:
df_data = pd.read_csv('https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv')
df_data.head(7)

Unnamed: 0,Month,Passengers
0,1949-01,112
1,1949-02,118
2,1949-03,132
3,1949-04,129
4,1949-05,121
5,1949-06,135
6,1949-07,148


In [26]:
df_data.shape

(144, 2)

In [27]:
df_passenger= df_data['Passengers']

In [28]:
# transform univariate time series to supervised learning problem
from numpy import array
# split a univariate sequence into samples
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
    # find the end of this pattern
        end_ix = i + n_steps
    # check if we are beyond the sequence
        if end_ix > len(sequence)-1:
            break
    # gather input and output parts of the pattern
        # print(i, end_ix)
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

## tuning data

In [29]:
n_steps = 2
X, y = split_sequence(df_passenger, n_steps)

In [30]:
print(X.shape, y.shape)

(142, 2) (142,)


In [31]:
# column names to X and y data frames
df_X = pd.DataFrame(X, columns=['t-'+str(i) for i in range(n_steps-1, -1,-1)])
df_y = pd.DataFrame(y, columns=['t+1 (prediction)'])

# concat df_X and df_y
df = pd.concat([df_X, df_y], axis=1)

In [32]:
# df_X.head()
# df_y.head()
df.head(3)

Unnamed: 0,t-1,t-0,t+1 (prediction)
0,112,118,132
1,118,132,129
2,132,129,121


## prepo

In [33]:
from sklearn.preprocessing import MinMaxScaler
scaler= MinMaxScaler()
X_norm= scaler.fit_transform(df_X)
# y_norm= scaler.fit_transform(df_y)

In [34]:
X_norm

array([[0.01544402, 0.02702703],
       [0.02702703, 0.05405405],
       [0.05405405, 0.04826255],
       [0.04826255, 0.03281853],
       [0.03281853, 0.05984556],
       [0.05984556, 0.08494208],
       [0.08494208, 0.08494208],
       [0.08494208, 0.06177606],
       [0.06177606, 0.02895753],
       [0.02895753, 0.        ],
       [0.        , 0.02702703],
       [0.02702703, 0.02123552],
       [0.02123552, 0.04247104],
       [0.04247104, 0.07142857],
       [0.07142857, 0.05984556],
       [0.05984556, 0.04054054],
       [0.04054054, 0.08687259],
       [0.08687259, 0.12741313],
       [0.12741313, 0.12741313],
       [0.12741313, 0.1042471 ],
       [0.1042471 , 0.05598456],
       [0.05598456, 0.01930502],
       [0.01930502, 0.06949807],
       [0.06949807, 0.07915058],
       [0.07915058, 0.08880309],
       [0.08880309, 0.14285714],
       [0.14285714, 0.11389961],
       [0.11389961, 0.13127413],
       [0.13127413, 0.14285714],
       [0.14285714, 0.18339768],
       [0.

## Split data

In [35]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.2, random_state=0)

## model

In [36]:
# import knn
from sklearn.neighbors import KNeighborsRegressor
model_knn = KNeighborsRegressor(n_neighbors=3)

## fit and predict knn

In [37]:
model_knn.fit(X_train, y_train)
y_pred=model_knn.predict(X_test)

## mse

$$ \text{MSE} = \frac{1}{n} \sum_{i=0}^n (y_i - \hat{y}_i)^2$$ 

In [38]:
from sklearn.metrics import mean_squared_error
mean_squared_error(y_test, y_pred)

2207.5363984674336

## result

In [39]:
y_test.shape

(29,)

In [40]:
y_pred.shape

(29,)

In [41]:
df_y_test = pd.DataFrame(y_test,columns=['y_test'])
df_y_pred = pd.DataFrame(y_pred,columns=['y_pred'])

df_hasil = pd.concat([df_y_test, df_y_pred], axis=1)

In [42]:
df_hasil

Unnamed: 0,y_test,y_pred
0,194,163.666667
1,203,252.0
2,170,143.666667
3,180,221.0
4,145,137.333333
5,119,125.0
6,318,375.333333
7,390,448.666667
8,318,343.666667
9,465,434.666667


In [43]:
# df_hasil.to_excel('df_hasil n_step={}.xlsx'.format(n_steps), index=False)

In [44]:
y.min()

104

In [45]:
y.max()

622