In [1]:
import numpy as np
import matplotlib.pyplot as plt
from catboost import CatBoostRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [2]:
data = np.genfromtxt('BurrXII.csv', delimiter=',', skip_header=1)

In [3]:
n, Size = data.shape
train = data[:int(n*0.8), :]
test = data[int(n*0.8):, :]
test = test[test[:, 0].argsort()]

In [4]:
train_X = train[:, 3:]
train_Y = train[:, 0:3]
test_X = test[:, 3:]
test_Y = test[:, 0:3]

### Регрессия на интервалах

In [5]:
BurrXII_inter = CatBoostRegressor(loss_function='MultiRMSE')
BurrXII_inter.fit(train_X, train_Y, eval_set = (test_X, test_Y))

0:	learn: 0.6745601	test: 0.6690755	best: 0.6690755 (0)	total: 3.65s	remaining: 1h 44s
1:	learn: 0.6713328	test: 0.6660247	best: 0.6660247 (1)	total: 8s	remaining: 1h 6m 32s
2:	learn: 0.6681937	test: 0.6631018	best: 0.6631018 (2)	total: 12s	remaining: 1h 6m 42s
3:	learn: 0.6651546	test: 0.6602837	best: 0.6602837 (3)	total: 16.2s	remaining: 1h 7m 23s
4:	learn: 0.6622705	test: 0.6575891	best: 0.6575891 (4)	total: 21s	remaining: 1h 9m 48s
5:	learn: 0.6595205	test: 0.6550216	best: 0.6550216 (5)	total: 25.1s	remaining: 1h 9m 14s
6:	learn: 0.6568947	test: 0.6526021	best: 0.6526021 (6)	total: 29s	remaining: 1h 8m 33s
7:	learn: 0.6543527	test: 0.6502002	best: 0.6502002 (7)	total: 33.1s	remaining: 1h 8m 19s
8:	learn: 0.6518485	test: 0.6478244	best: 0.6478244 (8)	total: 39.5s	remaining: 1h 12m 24s
9:	learn: 0.6494203	test: 0.6455061	best: 0.6455061 (9)	total: 43.9s	remaining: 1h 12m 27s
10:	learn: 0.6471362	test: 0.6433486	best: 0.6433486 (10)	total: 48.3s	remaining: 1h 12m 20s
11:	learn: 0.6449

<catboost.core.CatBoostRegressor at 0x1471ef140>

In [6]:
Cat_predictions = BurrXII_inter.predict(test_X)

In [7]:
BurrXII_inter.save_model('BurrXII_inter_model.cbm')

In [8]:
#Оценка качества регрессии первого параметра
mape = np.mean(np.abs((test_Y[:,0] - Cat_predictions[:,0]) / test_Y[:,0])) * 100
r2 = r2_score(test_Y[:,0], Cat_predictions[:,0])
mse = mean_squared_error(test_Y[:,0], Cat_predictions[:,0])
rmse = np.sqrt(mean_squared_error(test_Y[:,0], Cat_predictions[:,0]))
print("\nInter par 1")
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'R² Score: {r2}')
print(f'MAPE: {mape}%')

#Оценка качества регрессии второго параметра
mape = np.mean(np.abs((test_Y[:,1] - Cat_predictions[:,1]) / test_Y[:,1])) * 100
r2 = r2_score(test_Y[:,1], Cat_predictions[:,1])
mse = mean_squared_error(test_Y[:,1], Cat_predictions[:,1])
rmse = np.sqrt(mean_squared_error(test_Y[:,1], Cat_predictions[:,1]))
print("\nInter par 2")
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'R² Score: {r2}')
print(f'MAPE: {mape}%')

#Оценка качества регрессии третьего параметра
mape = np.mean(np.abs((test_Y[:,2] - Cat_predictions[:,2]) / test_Y[:,2])) * 100
r2 = r2_score(test_Y[:,2], Cat_predictions[:,2])
mse = mean_squared_error(test_Y[:,2], Cat_predictions[:,2])
rmse = np.sqrt(mean_squared_error(test_Y[:,2], Cat_predictions[:,2]))
print("\nInter par 3")
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'R² Score: {r2}')
print(f'MAPE: {mape}%')


Inter par 1
MSE: 0.05197285257555355
RMSE: 0.22797555258306437
R² Score: 0.7172376328166303
MAPE: 8.807451864128431%

Inter par 2
MSE: 0.16046008925024371
RMSE: 0.40057469871453905
R² Score: 0.1372307796920722
MAPE: 15.845065314317457%

Inter par 3
MSE: 0.004679449580923136
RMSE: 0.06840650247544552
R² Score: 0.9428080325920455
MAPE: 11.121217290730561%


### Регрессия на статистиках

In [9]:
stat_M = np.zeros((7999,10)) # Матрицы для записиси статистик наблюдений
test_stat_M = np.zeros((2000,10))

In [10]:
#Далее для каждого наблюдения вычисляются 4 статистики
stat_M[:,0] = np.mean(train_X, axis=1) # Вычисляется среднее 
stat_M[:,1] = np.var(train_X, axis=1) # Вычисляется дисперсию 
stat_M[:,2] = np.std(train_X, axis=1) # Вычисляется стандартное отклонение
stat_M[:,3] =  stat_M[:,2] / stat_M[:,0] # Вычисляется вариацию 

quantiles = [0.1, 0.25, 0.5, 0.75, 0.9, 0.95]
for i, q in enumerate(quantiles):
    stat_M[:, 4 + i] = np.quantile(train_X, q, axis=1)

In [11]:
test_stat_M[:,0] = np.mean(test_X, axis=1)
test_stat_M[:,1] = np.var(test_X, axis=1)
test_stat_M[:,2] = np.std(test_X, axis=1)
test_stat_M[:,3] =  test_stat_M[:,2] / test_stat_M[:,0]

quantiles = [0.1, 0.25, 0.5, 0.75, 0.9, 0.95]
for i, q in enumerate(quantiles):
    test_stat_M[:, 4 + i] = np.quantile(test_X, q, axis=1)

In [12]:
BurrXII_stat = CatBoostRegressor(iterations=10000,
                          learning_rate=0.05,
                          depth=5, loss_function='MultiRMSE')
BurrXII_stat.fit(stat_M, train_Y, eval_set = (test_stat_M, test_Y))

0:	learn: 0.6614918	test: 0.6559703	best: 0.6559703 (0)	total: 2.16ms	remaining: 21.6s
1:	learn: 0.6439584	test: 0.6385520	best: 0.6385520 (1)	total: 4.17ms	remaining: 20.8s
2:	learn: 0.6282209	test: 0.6229033	best: 0.6229033 (2)	total: 5.79ms	remaining: 19.3s
3:	learn: 0.6138213	test: 0.6088757	best: 0.6088757 (3)	total: 7.9ms	remaining: 19.7s
4:	learn: 0.6009793	test: 0.5961288	best: 0.5961288 (4)	total: 9.47ms	remaining: 18.9s
5:	learn: 0.5879850	test: 0.5833611	best: 0.5833611 (5)	total: 10.9ms	remaining: 18.2s
6:	learn: 0.5757570	test: 0.5712207	best: 0.5712207 (6)	total: 12.2ms	remaining: 17.5s
7:	learn: 0.5636904	test: 0.5595021	best: 0.5595021 (7)	total: 13.5ms	remaining: 16.9s
8:	learn: 0.5528211	test: 0.5489009	best: 0.5489009 (8)	total: 14.8ms	remaining: 16.4s
9:	learn: 0.5425287	test: 0.5389711	best: 0.5389711 (9)	total: 16.2ms	remaining: 16.2s
10:	learn: 0.5332721	test: 0.5299276	best: 0.5299276 (10)	total: 17.6ms	remaining: 16s
11:	learn: 0.5244594	test: 0.5214111	best: 0

<catboost.core.CatBoostRegressor at 0x11db12390>

In [13]:
BurrXII_stat.save_model('BurrXII_stat_model.cbm')

In [14]:
Cat_predictions = BurrXII_stat.predict(test_stat_M)

In [15]:
#Оценка качества регрессии первого параметра
mape = np.mean(np.abs((test_Y[:,0] - Cat_predictions[:,0]) / test_Y[:,0])) * 100
r2 = r2_score(test_Y[:,0], Cat_predictions[:,0])
mse = mean_squared_error(test_Y[:,0], Cat_predictions[:,0])
rmse = np.sqrt(mean_squared_error(test_Y[:,0], Cat_predictions[:,0]))
print("\nStat par 1")
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'R² Score: {r2}')
print(f'MAPE: {mape}%')

#Оценка качества регрессии второго параметра
mape = np.mean(np.abs((test_Y[:,1] - Cat_predictions[:,1]) / test_Y[:,1])) * 100
r2 = r2_score(test_Y[:,1], Cat_predictions[:,1])
mse = mean_squared_error(test_Y[:,1], Cat_predictions[:,1])
rmse = np.sqrt(mean_squared_error(test_Y[:,1], Cat_predictions[:,1]))
print("\nStat par 2")
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'R² Score: {r2}')
print(f'MAPE: {mape}%')

#Оценка качества регрессии третьего параметра
mape = np.mean(np.abs((test_Y[:,2] - Cat_predictions[:,2]) / test_Y[:,2])) * 100
r2 = r2_score(test_Y[:,2], Cat_predictions[:,2])
mse = mean_squared_error(test_Y[:,2], Cat_predictions[:,2])
rmse = np.sqrt(mean_squared_error(test_Y[:,2], Cat_predictions[:,2]))
print("\nStat par 3")
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'R² Score: {r2}')
print(f'MAPE: {mape}%')


Stat par 1
MSE: 0.0032675074043305827
RMSE: 0.05716211511421339
R² Score: 0.9822228705439137
MAPE: 1.8641381503448209%

Stat par 2
MSE: 0.037370212449194924
RMSE: 0.19331376683825424
R² Score: 0.7990661153924009
MAPE: 6.821630569866402%

Stat par 3
MSE: 0.0008362170829949337
RMSE: 0.028917418332121796
R² Score: 0.989779802233239
MAPE: 4.734258364245178%


### Регрессия на моментах

In [16]:
train_m_X = train_X.copy()
for i in range(train_X.shape[0]):
    for j in range(1, train_X.shape[1]):
        train_m_X[i,j] += train_m_X[i,j-1]

test_m_X = test_X.copy()
for i in range(test_X.shape[0]):
    for j in range(1, test_X.shape[1]):
        test_m_X[i,j] += test_m_X[i,j-1]

In [17]:
BurrXII_m = CatBoostRegressor(loss_function='MultiRMSE')
BurrXII_m.fit(train_m_X, train_Y, eval_set = (test_m_X, test_Y))

0:	learn: 0.6736736	test: 0.6680086	best: 0.6680086 (0)	total: 3.35s	remaining: 55m 43s
1:	learn: 0.6695918	test: 0.6639527	best: 0.6639527 (1)	total: 6.52s	remaining: 54m 14s
2:	learn: 0.6657613	test: 0.6601746	best: 0.6601746 (2)	total: 10.3s	remaining: 56m 57s
3:	learn: 0.6622111	test: 0.6566629	best: 0.6566629 (3)	total: 14.6s	remaining: 1h 44s
4:	learn: 0.6587826	test: 0.6532691	best: 0.6532691 (4)	total: 18.5s	remaining: 1h 1m 16s
5:	learn: 0.6555026	test: 0.6500020	best: 0.6500020 (5)	total: 22.2s	remaining: 1h 1m 11s
6:	learn: 0.6523605	test: 0.6469061	best: 0.6469061 (6)	total: 25.7s	remaining: 1h 40s
7:	learn: 0.6494612	test: 0.6440510	best: 0.6440510 (7)	total: 29.1s	remaining: 1h 11s
8:	learn: 0.6466711	test: 0.6413165	best: 0.6413165 (8)	total: 32.4s	remaining: 59m 25s
9:	learn: 0.6439844	test: 0.6386639	best: 0.6386639 (9)	total: 35.7s	remaining: 58m 57s
10:	learn: 0.6414706	test: 0.6361785	best: 0.6361785 (10)	total: 38.9s	remaining: 58m 14s
11:	learn: 0.6391106	test: 0.

<catboost.core.CatBoostRegressor at 0x1471eff80>

In [18]:
Cat_predictions = BurrXII_m.predict(test_X)

In [19]:
BurrXII_m.save_model('BurrXII_moment_model.cbm')

In [20]:
#Оценка качества регрессии первого параметра
mape = np.mean(np.abs((test_Y[:,0] - Cat_predictions[:,0]) / test_Y[:,0])) * 100
r2 = r2_score(test_Y[:,0], Cat_predictions[:,0])
mse = mean_squared_error(test_Y[:,0], Cat_predictions[:,0])
rmse = np.sqrt(mean_squared_error(test_Y[:,0], Cat_predictions[:,0]))
print("\nMoment par 1")
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'R² Score: {r2}')
print(f'MAPE: {mape}%')

#Оценка качества регрессии второго параметра
mape = np.mean(np.abs((test_Y[:,1] - Cat_predictions[:,1]) / test_Y[:,1])) * 100
r2 = r2_score(test_Y[:,1], Cat_predictions[:,1])
mse = mean_squared_error(test_Y[:,1], Cat_predictions[:,1])
rmse = np.sqrt(mean_squared_error(test_Y[:,1], Cat_predictions[:,1]))
print("\nMoment par 2")
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'R² Score: {r2}')
print(f'MAPE: {mape}%')

#Оценка качества регрессии третьего параметра
mape = np.mean(np.abs((test_Y[:,2] - Cat_predictions[:,2]) / test_Y[:,2])) * 100
r2 = r2_score(test_Y[:,2], Cat_predictions[:,2])
mse = mean_squared_error(test_Y[:,2], Cat_predictions[:,2])
rmse = np.sqrt(mean_squared_error(test_Y[:,2], Cat_predictions[:,2]))
print("\Moment par 3")
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'R² Score: {r2}')
print(f'MAPE: {mape}%')


Moment par 1
MSE: 0.4659982348295775
RMSE: 0.6826406337375307
R² Score: -1.5352998239250426
MAPE: 23.442652158422117%

Moment par 2
MSE: 0.18345462444570207
RMSE: 0.4283160333745423
R² Score: 0.013592700624391107
MAPE: 17.302819921759397%
\Moment par 3
MSE: 0.3039013223631256
RMSE: 0.5512724574682882
R² Score: -2.7142647277753946
MAPE: 89.13935609773803%


  print("\Moment par 3")
