In [325]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

In [326]:
data = pd.read_csv("climate.csv")
data.drop_duplicates()
data["date"] = data["date"].apply(pd.to_datetime)
data["date"] = (data["date"]).astype(np.int64) / 1000000000
data.head()

Unnamed: 0,date,meantemp,humidity,wind_speed,meanpressure
0,1356998000.0,10.0,84.5,0.0,1015.666667
1,1357085000.0,7.4,92.0,2.98,1017.8
2,1357171000.0,7.166667,87.0,4.633333,1018.666667
3,1357258000.0,8.666667,71.333333,1.233333,1017.166667
4,1357344000.0,6.0,86.833333,3.7,1016.5


In [327]:
start_ind = 300
end_ind = 450
n = round(0.8 * (end_ind - start_ind))

In [328]:
x = data["date"][start_ind:end_ind].to_numpy() - data["date"][start_ind]
y = data["meantemp"][start_ind:end_ind].to_numpy()

In [329]:
train_x = x[0:n]
train_y = y[0:n]

test_x = x[n:]
test_y = y[n:]

In [330]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train_x, y=train_y, name="train"))
fig.add_trace(go.Scatter(x=test_x, y=test_y, name="test"))
fig.update_layout(title="Выборки",
                  xaxis_title="Дата",
                  yaxis_title="Температура")

МНК

In [331]:
from scipy.optimize import curve_fit

In [332]:
def func(x, c0, c1, c2):
  return c0 + c1 * x + c2 * x ** 2

In [333]:
popt, pcov = curve_fit(func, train_x, train_y)
c0, c1, c2 = popt[0], popt[1], popt[2]

res_y = func(train_x, c0, c1, c2)

In [334]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train_x, y=train_y, name="Исходные"))
fig.add_trace(go.Scatter(x=train_x, y=res_y, name="Результат"))
fig.update_layout(title="МНК (train)",
                  xaxis_title="Дата",
                  yaxis_title="Температура")

In [335]:
res_y = func(test_x, c0, c1, c2)

In [336]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=test_x, y=test_y, name="Исходные"))
fig.add_trace(go.Scatter(x=test_x, y=res_y, name="Результат"))
fig.update_layout(title="МНК (test)",
                  xaxis_title="Дата",
                  yaxis_title="Температура")

KNN

In [337]:
from sklearn.neighbors import KNeighborsRegressor

In [338]:
model = KNeighborsRegressor(n_neighbors=3)

model.fit(train_x.reshape(-1, 1), train_y)
res_y = model.predict(train_x.reshape(-1, 1))

In [339]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train_x, y=train_y, name="Исходные"))
fig.add_trace(go.Scatter(x=train_x, y=res_y, name="Результат"))
fig.update_layout(title="KNN (train)",
                  xaxis_title="Дата",
                  yaxis_title="Температура")

In [340]:
res_y = model.predict(test_x.reshape(-1, 1))

In [341]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=test_x, y=test_y, name="Исходные"))
fig.add_trace(go.Scatter(x=test_x, y=res_y, name="Результат"))
fig.update_layout(title="KNN (test)",
                  xaxis_title="Дата",
                  yaxis_title="Температура")

ARIMA

In [342]:
from statsmodels.tsa.arima.model import ARIMA

In [343]:
history = [y for y in train_y]
predictions = list()

for i in range(len(test_y)):
  model = ARIMA(history, order=(1, 1, 0))
  model_fit = model.fit()
  yhat = model_fit.forecast()[0]
  predictions.append(yhat)
  obs = test_y[i]
  history.append(obs)

In [345]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train_x, y=train_y, name="train"))
fig.add_trace(go.Scatter(x=test_x, y=test_y, name="test"))
fig.add_trace(go.Scatter(x=test_x, y=predictions, name="ARIMA результат"))
fig.update_layout(title="ARIMA",
                  xaxis_title="Дата",
                  yaxis_title="Температура")