In [1]:
import numpy as np
import matplotlib.pyplot as plt
from catboost import CatBoostRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [2]:
data = np.genfromtxt('Frechet.csv', delimiter=',', skip_header=1)

In [3]:
n, Size = data.shape
train = data[:int(n*0.8), :]
test = data[int(n*0.8):, :]
test = test[test[:, 0].argsort()]

In [None]:
train_X = train[:, 3:]
train_Y = train[:, 0:3]
test_X = test[:, 3:]
test_Y = test[:, 0:3]

### Регрессия на интервалах

In [5]:
Frechet_inter = CatBoostRegressor(loss_function='MultiRMSE')
Frechet_inter.fit(train_X, train_Y, eval_set = (test_X, test_Y))

0:	learn: 0.5369404	test: 0.5368028	best: 0.5368028 (0)	total: 3.43s	remaining: 57m 9s
1:	learn: 0.5333681	test: 0.5333050	best: 0.5333050 (1)	total: 7.7s	remaining: 1h 4m 1s
2:	learn: 0.5300080	test: 0.5299896	best: 0.5299896 (2)	total: 12.1s	remaining: 1h 6m 49s
3:	learn: 0.5267606	test: 0.5268256	best: 0.5268256 (3)	total: 16.5s	remaining: 1h 8m 39s
4:	learn: 0.5235212	test: 0.5237599	best: 0.5237599 (4)	total: 20.3s	remaining: 1h 7m 14s
5:	learn: 0.5204794	test: 0.5208356	best: 0.5208356 (5)	total: 23.9s	remaining: 1h 6m 1s
6:	learn: 0.5175122	test: 0.5179246	best: 0.5179246 (6)	total: 27.7s	remaining: 1h 5m 27s
7:	learn: 0.5146819	test: 0.5152780	best: 0.5152780 (7)	total: 32.3s	remaining: 1h 6m 42s
8:	learn: 0.5119247	test: 0.5127158	best: 0.5127158 (8)	total: 37.2s	remaining: 1h 8m 14s
9:	learn: 0.5092730	test: 0.5101683	best: 0.5101683 (9)	total: 41.7s	remaining: 1h 8m 44s
10:	learn: 0.5067135	test: 0.5076680	best: 0.5076680 (10)	total: 46.1s	remaining: 1h 9m 6s
11:	learn: 0.50

<catboost.core.CatBoostRegressor at 0x17fd000b0>

In [6]:
Cat_predictions = Frechet_inter.predict(test_X)

In [7]:
Frechet_inter.save_model('Frechet_inter_model.cbm')

In [8]:
#Оценка качества регрессии первого параметра
mape = np.mean(np.abs((test_Y[:,0] - Cat_predictions[:,0]) / test_Y[:,0])) * 100
r2 = r2_score(test_Y[:,0], Cat_predictions[:,0])
mse = mean_squared_error(test_Y[:,0], Cat_predictions[:,0])
rmse = np.sqrt(mean_squared_error(test_Y[:,0], Cat_predictions[:,0]))
print("\nInter par 1")
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'R² Score: {r2}')
print(f'MAPE: {mape}%')

#Оценка качества регрессии второго параметра
mape = np.mean(np.abs((test_Y[:,1] - Cat_predictions[:,1]) / test_Y[:,1])) * 100
r2 = r2_score(test_Y[:,1], Cat_predictions[:,1])
mse = mean_squared_error(test_Y[:,1], Cat_predictions[:,1])
rmse = np.sqrt(mean_squared_error(test_Y[:,1], Cat_predictions[:,1]))
print("\nInter par 2")
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'R² Score: {r2}')
print(f'MAPE: {mape}%')

#Оценка качества регрессии третьего параметра
mape = np.mean(np.abs((test_Y[:,2] - Cat_predictions[:,2]) / test_Y[:,2])) * 100
r2 = r2_score(test_Y[:,2], Cat_predictions[:,2])
mse = mean_squared_error(test_Y[:,2], Cat_predictions[:,2])
rmse = np.sqrt(mean_squared_error(test_Y[:,2], Cat_predictions[:,2]))
print("\nInter par 3")
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'R² Score: {r2}')
print(f'MAPE: {mape}%')


Inter par 1
MSE: 0.09298580728870183
RMSE: 0.304935742884795
R² Score: 0.5050389496926899
MAPE: 11.737526623546662%

Inter par 2
MSE: 0.008227074722858432
RMSE: 0.09070322333224125
R² Score: 0.9000465354186465
MAPE: 23.256604843191244%

Inter par 3
MSE: 0.007557141368588346
RMSE: 0.08693182023050217
R² Score: 0.6465994429934918
MAPE: 235.35120911452313%


### Регрессия на статистиках

In [9]:
stat_M = np.zeros((7999,10)) # Матрицы для записиси статистик наблюдений
test_stat_M = np.zeros((2000,10))

In [10]:
#Далее для каждого наблюдения вычисляются 4 статистики
stat_M[:,0] = np.mean(train_X, axis=1) # Вычисляется среднее 
stat_M[:,1] = np.var(train_X, axis=1) # Вычисляется дисперсию 
stat_M[:,2] = np.std(train_X, axis=1) # Вычисляется стандартное отклонение
stat_M[:,3] =  stat_M[:,2] / stat_M[:,0] # Вычисляется вариацию 

quantiles = [0.1, 0.25, 0.5, 0.75, 0.9, 0.95]
for i, q in enumerate(quantiles):
    stat_M[:, 4 + i] = np.quantile(train_X, q, axis=1)

In [11]:
test_stat_M[:,0] = np.mean(test_X, axis=1)
test_stat_M[:,1] = np.var(test_X, axis=1)
test_stat_M[:,2] = np.std(test_X, axis=1)
test_stat_M[:,3] =  test_stat_M[:,2] / test_stat_M[:,0]

quantiles = [0.1, 0.25, 0.5, 0.75, 0.9, 0.95]
for i, q in enumerate(quantiles):
    test_stat_M[:, 4 + i] = np.quantile(test_X, q, axis=1)

In [12]:
Frechet_stat = CatBoostRegressor(iterations=10000,
                          learning_rate=0.05,
                          depth=5, loss_function='MultiRMSE')
Frechet_stat.fit(stat_M, train_Y, eval_set = (test_stat_M, test_Y))

0:	learn: 0.5238099	test: 0.5236263	best: 0.5236263 (0)	total: 1.53ms	remaining: 15.3s
1:	learn: 0.5073536	test: 0.5073088	best: 0.5073088 (1)	total: 3.06ms	remaining: 15.3s
2:	learn: 0.4923725	test: 0.4924931	best: 0.4924931 (2)	total: 4.42ms	remaining: 14.7s
3:	learn: 0.4781981	test: 0.4783075	best: 0.4783075 (3)	total: 6.51ms	remaining: 16.3s
4:	learn: 0.4650801	test: 0.4652604	best: 0.4652604 (4)	total: 8.15ms	remaining: 16.3s
5:	learn: 0.4522694	test: 0.4526132	best: 0.4526132 (5)	total: 9.66ms	remaining: 16.1s
6:	learn: 0.4395556	test: 0.4402612	best: 0.4402612 (6)	total: 11ms	remaining: 15.7s
7:	learn: 0.4278775	test: 0.4286643	best: 0.4286643 (7)	total: 12.3ms	remaining: 15.3s
8:	learn: 0.4170941	test: 0.4178824	best: 0.4178824 (8)	total: 13.6ms	remaining: 15.1s
9:	learn: 0.4069492	test: 0.4077712	best: 0.4077712 (9)	total: 14.8ms	remaining: 14.8s
10:	learn: 0.3978006	test: 0.3986370	best: 0.3986370 (10)	total: 16.2ms	remaining: 14.7s
11:	learn: 0.3891172	test: 0.3901734	best: 

<catboost.core.CatBoostRegressor at 0x30296fdd0>

In [13]:
Frechet_stat.save_model('Frechet_stat_model.cbm')

In [14]:
Cat_predictions = Frechet_stat.predict(test_stat_M)

In [15]:
#Оценка качества регрессии первого параметра
mape = np.mean(np.abs((test_Y[:,0] - Cat_predictions[:,0]) / test_Y[:,0])) * 100
r2 = r2_score(test_Y[:,0], Cat_predictions[:,0])
mse = mean_squared_error(test_Y[:,0], Cat_predictions[:,0])
rmse = np.sqrt(mean_squared_error(test_Y[:,0], Cat_predictions[:,0]))
print("\nStat par 1")
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'R² Score: {r2}')
print(f'MAPE: {mape}%')

#Оценка качества регрессии второго параметра
mape = np.mean(np.abs((test_Y[:,1] - Cat_predictions[:,1]) / test_Y[:,1])) * 100
r2 = r2_score(test_Y[:,1], Cat_predictions[:,1])
mse = mean_squared_error(test_Y[:,1], Cat_predictions[:,1])
rmse = np.sqrt(mean_squared_error(test_Y[:,1], Cat_predictions[:,1]))
print("\nStat par 2")
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'R² Score: {r2}')
print(f'MAPE: {mape}%')

#Оценка качества регрессии третьего параметра
mape = np.mean(np.abs((test_Y[:,2] - Cat_predictions[:,2]) / test_Y[:,2])) * 100
r2 = r2_score(test_Y[:,2], Cat_predictions[:,2])
mse = mean_squared_error(test_Y[:,2], Cat_predictions[:,2])
rmse = np.sqrt(mean_squared_error(test_Y[:,2], Cat_predictions[:,2]))
print("\nStat par 3")
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'R² Score: {r2}')
print(f'MAPE: {mape}%')


Stat par 1
MSE: 0.017441158571460575
RMSE: 0.13206497859561625
R² Score: 0.9071611634418172
MAPE: 4.107060552744106%

Stat par 2
MSE: 0.0009046660123663554
RMSE: 0.030077666338437153
R² Score: 0.9890089120044363
MAPE: 6.84263357159243%

Stat par 3
MSE: 0.0007560531727120971
RMSE: 0.02749642108915444
R² Score: 0.9646440896985756
MAPE: 52.18894561054876%


### Регрессия на моментах

In [16]:
train_m_X = train_X.copy()
for i in range(train_X.shape[0]):
    for j in range(1, train_X.shape[1]):
        train_m_X[i,j] += train_m_X[i,j-1]

test_m_X = test_X.copy()
for i in range(test_X.shape[0]):
    for j in range(1, test_X.shape[1]):
        test_m_X[i,j] += test_m_X[i,j-1]

In [17]:
Frechet_m = CatBoostRegressor(loss_function='MultiRMSE')
Frechet_m.fit(train_m_X, train_Y, eval_set = (test_m_X, test_Y))

0:	learn: 0.5351555	test: 0.5350844	best: 0.5350844 (0)	total: 3.99s	remaining: 1h 6m 24s
1:	learn: 0.5299958	test: 0.5301616	best: 0.5301616 (1)	total: 9.09s	remaining: 1h 15m 34s
2:	learn: 0.5250786	test: 0.5254429	best: 0.5254429 (2)	total: 14s	remaining: 1h 17m 21s
3:	learn: 0.5201078	test: 0.5206391	best: 0.5206391 (3)	total: 18.3s	remaining: 1h 15m 54s
4:	learn: 0.5156713	test: 0.5164092	best: 0.5164092 (4)	total: 23s	remaining: 1h 16m 21s
5:	learn: 0.5112356	test: 0.5120775	best: 0.5120775 (5)	total: 27.3s	remaining: 1h 15m 27s
6:	learn: 0.5071155	test: 0.5081577	best: 0.5081577 (6)	total: 32.1s	remaining: 1h 15m 49s
7:	learn: 0.5033068	test: 0.5045202	best: 0.5045202 (7)	total: 36.1s	remaining: 1h 14m 42s
8:	learn: 0.4996747	test: 0.5010320	best: 0.5010320 (8)	total: 40.3s	remaining: 1h 13m 58s
9:	learn: 0.4961748	test: 0.4977063	best: 0.4977063 (9)	total: 44.4s	remaining: 1h 13m 12s
10:	learn: 0.4928819	test: 0.4945615	best: 0.4945615 (10)	total: 48.2s	remaining: 1h 12m 11s
11

<catboost.core.CatBoostRegressor at 0x30296f890>

In [18]:
Cat_predictions = Frechet_m.predict(test_X)

In [19]:
Frechet_m.save_model('Frechet_moment_model.cbm')

In [21]:
#Оценка качества регрессии первого параметра
mape = np.mean(np.abs((test_Y[:,0] - Cat_predictions[:,0]) / test_Y[:,0])) * 100
r2 = r2_score(test_Y[:,0], Cat_predictions[:,0])
mse = mean_squared_error(test_Y[:,0], Cat_predictions[:,0])
rmse = np.sqrt(mean_squared_error(test_Y[:,0], Cat_predictions[:,0]))
print("\nMoment par 1")
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'R² Score: {r2}')
print(f'MAPE: {mape}%')

#Оценка качества регрессии второго параметра
mape = np.mean(np.abs((test_Y[:,1] - Cat_predictions[:,1]) / test_Y[:,1])) * 100
r2 = r2_score(test_Y[:,1], Cat_predictions[:,1])
mse = mean_squared_error(test_Y[:,1], Cat_predictions[:,1])
rmse = np.sqrt(mean_squared_error(test_Y[:,1], Cat_predictions[:,1]))
print("\nMoment par 2")
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'R² Score: {r2}')
print(f'MAPE: {mape}%')

#Оценка качества регрессии третьего параметра
mape = np.mean(np.abs((test_Y[:,2] - Cat_predictions[:,2]) / test_Y[:,2])) * 100
r2 = r2_score(test_Y[:,2], Cat_predictions[:,2])
mse = mean_squared_error(test_Y[:,2], Cat_predictions[:,2])
rmse = np.sqrt(mean_squared_error(test_Y[:,2], Cat_predictions[:,2]))
print("\nMoment par 3")
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'R² Score: {r2}')
print(f'MAPE: {mape}%')


Moment par 1
MSE: 0.18884499826549883
RMSE: 0.4345629968894025
R² Score: -0.005217047764778382
MAPE: 17.14245327359611%

Moment par 2
MSE: 0.22835437582859378
RMSE: 0.47786439062624636
R² Score: -1.7743531917806767
MAPE: 75.07736001829889%

Moment par 3
MSE: 0.10545349837234508
RMSE: 0.324736044153317
R² Score: -3.9314050439727426
MAPE: 190.05050903984352%
