### Импорт библиотек


In [None]:
%pylab inline
import numpy as np
import pandas as pd
import pylab as plt
from datetime import datetime

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import TensorBoard


from sklearn.model_selection import train_test_split, GridSearchCV, KFold, cross_val_score
from sklearn.ensemble import BaggingRegressor, GradientBoostingRegressor
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.tree import DecisionTreeRegressor

import seaborn as sns
sns.set(style="darkgrid")
from IPython.display import Markdown, display
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
import ppscore as pps

def printmd(string):
    display(Markdown(string))

pd.options.display.max_rows = 999
pd.options.display.max_columns = 999


RND_SEED=1987

### Функции для оценки

In [None]:
#функция для отбора нейросетей
def ann_model(Xtrain, ytrain, Xtest, ytest):
    min_mse = 1  
    for n_neurons in range(1,4):
        for i in [1e-2,1e-3,1e-4]:
            model = Sequential()
            model.add(tf.keras.Input(shape=(1,)))
            model.add(Dense(n_neurons, activation='linear'))
            model.add(Dense(1, activation='linear'))
            opt = optimizers.Adam(lr = i)
            model.compile(loss = 'mean_squared_error', optimizer = opt)
            history = model.fit(Xtrain, ytrain, validation_data = (Xtest, ytest), epochs = 50, verbose = 0)

            # Plot training & validation loss values

            plt.plot(history.history['loss'])
            plt.plot(history.history['val_loss'])
            title_text = 'Feature_name: ' + str(Xtrain)[2:8] + '\n' + 'Model parameters:' + str(n_neurons) + ' neurons, ' + str(i) + ' learning rate'
            plt.title(title_text)
            plt.ylabel('Loss')
            plt.xlabel('Epoch')
            plt.legend(['Train', 'Test'], loc='upper left')
            plt.show()
            print('Ошибка на тестовом сете: ', history.history['val_loss'][-1])

            current_mse = history.history['val_loss'][-1]
            if current_mse < min_mse:
                min_mse = current_mse
                best_model = model

    return best_model, min_mse

#составление массива с прогнозами
def fill_predictions(predictions, currency_pair, model):
    #заполняем раздел в файле прогнозов
    #pred = np.reshape(model.predict([means_without_na[currency_pair][1]]), -1)
    predictions[currency_pair][0] = means_without_na[currency_pair][0]

    for i in range(1, 13):
        pred = np.reshape(model.predict([float(predictions[currency_pair][i-1])]), -1)
        predictions[currency_pair][i] = pred[0]

    return predictions


#функция для отбора нейросетей для прогноза ставок
def rates_model(Xtrain, ytrain, Xtest, ytest):
    min_mse = 1  
    for n_neurons in range(2,6):
        for i in [1e-2,1e-3,1e-4]:
            model = Sequential()
            model.add(tf.keras.Input(shape=(2,)))
            model.add(Dense(n_neurons, activation='linear'))
            model.add(Dense(1, activation='linear'))
            opt = optimizers.Adam(lr = i)
            model.compile(loss = 'mean_squared_error', optimizer = opt)
            history = model.fit(Xtrain, ytrain, 
                                validation_data = (Xtest, ytest), 
                                epochs = 100, verbose = 0)

            # Plot training & validation loss values

            plt.plot(history.history['loss'])
            plt.plot(history.history['val_loss'])
            title_text = 'Model parameters:' + str(n_neurons) + ' neurons, ' + str(i) + ' learning rate'
            plt.title(title_text)
            plt.ylabel('Loss')
            plt.xlabel('Epoch')
            plt.legend(['Train', 'Test'], loc='upper left')
            plt.show()
            print('Ошибка на тестовом сете: ', history.history['val_loss'][-1])

            current_mse = history.history['val_loss'][-1]
            if current_mse < min_mse:
                min_mse = current_mse
                best_model = model

    return best_model, min_mse

### Предобработка данных ДВК

In [None]:
#read file for DVK forecast

df = pd.read_excel('DVK.xlsx', sheet_name='sheet0', header=None)
print(df.shape)

#cleaning dataframe
df.drop([0,1,2,3,4,5], axis = 0, inplace = True)
df.reset_index(inplace = True, drop = True)
#creating names for cols
df.columns = ['date','eurusd','eurchf','gbpusd']

# calculate monthly averages
count = 0
index = 0
sumeur = 0
sumchf = 0
sumgbp = 0
eurusd_mean = []
chf_mean = []
gbp_mean = []

for i in range(df.shape[0]-1):
    if df['date'].iloc[index].month == df['date'].iloc[index+1].month:
        sumeur = sumeur + df['eurusd'].iloc[index]
        sumchf = sumchf + df['eurchf'].iloc[index]
        sumgbp = sumgbp + df['gbpusd'].iloc[index]
        
        count = count + 1
        index = index + 1
    else:
        sumeur = sumeur + df['eurusd'].iloc[index]
        sumchf = sumchf + df['eurchf'].iloc[index]
        sumgbp = sumgbp + df['gbpusd'].iloc[index]
        
        count = count + 1
                
        mean_eur = sumeur/count
        eurusd_mean.append(mean_eur)
        
        mean_chf = sumchf/count
        chf_mean.append(mean_chf)
        
        mean_gbp = sumgbp/count
        gbp_mean.append(mean_gbp)
        
        
        count = 0
        sumchf = 0
        sumgbp = 0
        sumeur = 0
        index = index + 1

#forming dataframe from monthly means

means = pd.DataFrame(
    {'eurusd':eurusd_mean,
     'eurchf':chf_mean,
     'gbpusd':gbp_mean
    })       
means_without_na = means[0:255]

# создаем массив лагов для каждой валютной пары
X_eurusd = means_without_na['eurusd'][1:]
X_eurchf = means_without_na['eurchf'][1:]
X_gbpusd = means_without_na['gbpusd'][1:]

#создаем массив целевых переменных
y_eurusd = means_without_na['eurusd'][0:254]
y_eurchf = means_without_na['eurchf'][0:254]
y_gbpusd = means_without_na['gbpusd'][0:254]

#Сделаем разбивку на трейн и тест
breakpoint = 30
X_eurusd_train = X_eurusd[breakpoint:]
X_eurusd_test = X_eurusd[0:breakpoint]
X_eurchf_train = X_eurchf[breakpoint:]
X_eurchf_test = X_eurchf[0:breakpoint]
X_gbpusd_train = X_gbpusd[breakpoint:]
X_gbpusd_test = X_gbpusd[0:breakpoint]

y_eurusd_train = y_eurusd[breakpoint:]
y_eurusd_test = y_eurusd[0:breakpoint]
y_eurchf_train = y_eurchf[breakpoint:]
y_eurchf_test = y_eurchf[0:breakpoint]
y_gbpusd_train = y_gbpusd[breakpoint:]
y_gbpusd_test = y_gbpusd[0:breakpoint]

#массив предиктов

predictions = pd.DataFrame(columns = ['fed_target', 'ecb_key', 'eurusd', 'eurchf', 'gbpusd', 'bunds2', 'bunds5', 'bunds10', 
                                      'ust2', 'ust5', 'ust10', 'eu_libor3', 'us_libor3'], 
                           index=['m+0', 'm+1', 'm+2', 'm+3', 'm+4', 'm+5', 'm+6', 'm+7', 'm+8', 'm+9', 'm+10', 'm+11', 'm+12'])

    


### Предобработка данных краткосрочного прогноза

In [None]:
#read file for shortrun forecast and preprocess it

scf = pd.read_excel('shortrun.xlsx', sheet_name='sheet0', header=None)
print(scf.shape)

scf.drop(scf.index[0:6], axis = 0, inplace = True)
scf.drop([2,4,6,7,8,12,13,16,17,18,19,20,25,26,27], axis = 1, inplace = True)
scf.columns = ['date','bunds2', 'bunds5', 'bunds10', 'ust2', 'ust5', 'ust10', 'eu_libor3', 'us_libor3', 'fed_target', 'fed_effect', 'ecb_key', 'ecb_depo']

# calculate monthly averages
count = 0
index = 0

sumbund2 = 0
sumbund5 = 0
sumbund10 = 0

sumust2 = 0
sumust5 = 0
sumust10 = 0

sumeu_libor3 = 0
sumus_libor3 = 0

sumfed_target = 0
sumfed_effect = 0
sumecb_key = 0
sumecb_depo = 0

bund2_mean = []
bund5_mean = []
bund10_mean = []
ust2_mean = []
ust5_mean = []
ust10_mean = []

eulibor_mean = []
ustlibor_mean = []

fedtarget_mean = []
fedeffect_mean = []
ecbkey_mean = []
ecbdepo_mean = []


for i in range(scf.shape[0]-1):
    if scf['date'].iloc[index].month == scf['date'].iloc[index+1].month:
                
        sumbund2 = sumbund2 + scf['bunds2'].iloc[index]
        sumbund5 = sumbund5 + scf['bunds5'].iloc[index]
        sumbund10 = sumbund10 + scf['bunds10'].iloc[index]

        sumust2 = sumust2 + scf['ust2'].iloc[index]
        sumust5 = sumust5 + scf['ust5'].iloc[index]
        sumust10 = sumust10 + scf['ust10'].iloc[index]

        sumeu_libor3 = sumeu_libor3 + scf['eu_libor3'].iloc[index]
        sumus_libor3 = sumus_libor3 + scf['us_libor3'].iloc[index]

        sumfed_target = sumfed_target + scf['fed_target'].iloc[index]
        sumfed_effect = sumfed_effect + scf['fed_effect'].iloc[index]
        sumecb_key = sumecb_key + scf['ecb_key'].iloc[index]
        sumecb_depo = sumecb_depo + scf['ecb_depo'].iloc[index]
        
        count = count + 1
        index = index + 1
    else:
        sumbund2 = sumbund2 + scf['bunds2'].iloc[index]
        sumbund5 = sumbund5 + scf['bunds5'].iloc[index]
        sumbund10 = sumbund10 + scf['bunds10'].iloc[index]

        sumust2 = sumust2 + scf['ust2'].iloc[index]
        sumust5 = sumust5 + scf['ust5'].iloc[index]
        sumust10 = sumust10 + scf['ust10'].iloc[index]

        sumeu_libor3 = sumeu_libor3 + scf['eu_libor3'].iloc[index]
        sumus_libor3 = sumus_libor3 + scf['us_libor3'].iloc[index]

        sumfed_target = sumfed_target + scf['fed_target'].iloc[index]
        sumfed_effect = sumfed_effect + scf['fed_effect'].iloc[index]
        sumecb_key = sumecb_key + scf['ecb_key'].iloc[index]
        sumecb_depo = sumecb_depo + scf['ecb_depo'].iloc[index]
        
        count = count + 1
        
        bund2_mean.append(sumbund2/count)
        bund5_mean.append(sumbund5/count)
        bund10_mean.append(sumbund10/count)
        
        ust2_mean.append(sumust2/count)
        ust5_mean.append(sumust5/count)
        ust10_mean.append(sumust10/count)

        eulibor_mean.append(sumeu_libor3/count)
        ustlibor_mean.append(sumus_libor3/count)

        fedtarget_mean.append(sumfed_target/count)
        fedeffect_mean.append(sumfed_effect/count)
        ecbkey_mean.append(sumecb_key/count)
        ecbdepo_mean.append(sumecb_depo/count)
        
        sumbund2 = 0
        sumbund5 = 0
        sumbund10 = 0

        sumust2 = 0
        sumust5 = 0
        sumust10 = 0

        sumeu_libor3 = 0
        sumus_libor3 = 0

        sumfed_target = 0
        sumfed_effect = 0
        sumecb_key = 0
        sumecb_depo = 0
                
        count = 0
        index = index + 1

        
#forming dataframe from monthly means

scf_means = pd.DataFrame(
    {'bunds2':bund2_mean, 
     'bunds5':bund5_mean, 
     'bunds10':bund10_mean, 
     'ust2':ust2_mean, 
     'ust5':ust5_mean, 
     'ust10':ust10_mean, 
     'eu_libor3':eulibor_mean, 
     'us_libor3':ustlibor_mean, 
     'fed_target':fedtarget_mean, 
     'fed_effect':fedeffect_mean, 
     'ecb_key':ecbkey_mean, 
     'ecb_depo':ecbdepo_mean
      
    })

scf_means_filled =  scf_means[0:250]

#формируем массив прогнозов   
predictions['fed_target'] = 0.25
predictions.iloc[12, 0] = 0.5
predictions['ecb_key'] = 0
predictions
   

### Обучение и прогноз

In [None]:
# прогоняем модель на всех трех валютных парах
# формируем массив прогнозов
best_model, min_mse = ann_model(X_eurusd_train, y_eurusd_train,X_eurusd_test, y_eurusd_test)
predictions = fill_predictions(predictions, 'eurusd', best_model)

best_model, min_mse = ann_model(X_gbpusd_train, y_gbpusd_train,X_gbpusd_test, y_gbpusd_test)
predictions = fill_predictions(predictions, 'gbpusd', best_model)

best_model, min_mse = ann_model(X_eurchf_train, y_eurchf_train,X_eurchf_test, y_eurchf_test)
predictions = fill_predictions(predictions, 'eurchf', best_model)



In [None]:
#Прогноз Bunds
for item in ['bunds2', 'bunds5', 'bunds10', 'eu_libor3']:
    X = pd.DataFrame(columns = ['ecb_key', item])
    y = pd.DataFrame(columns = [item])
    X['ecb_key'] = scf_means_filled['ecb_key'][0:]
    X[item][0:240] = scf_means_filled[item][1:241]
    y[item] = scf_means_filled[item][0:240]
    X = X[0:240][:]
    y = y[0:240]
    
    #здесь нужно перевести массив в формат float, так как по умолчанию некоторые значения попадают в него в строковом виде
    X = X.apply(pd.to_numeric, errors='coerce')
    
    # разбиваем на трейн и тест в случайном порядке, так как везде используется только один лаг, и будем считать, что он несет в себе всю информацию о движении цен
    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.33, random_state=42)
    
    #здесь мы заполняем первое значение в массиве прогнозов с помощью текущей средней, а в дальнейшем будем уже использовать обученную модель
    predictions [item][0] = scf_means_filled[item][0] 
    model, mse = rates_model(Xtrain, ytrain,Xtest, ytest)
    
    for i in range(1, 13):
        features = np.array([[predictions['ecb_key'][i], predictions[item][i-1]]],dtype = 'float64')
        pred = model.predict(features)
        predictions[item][i] = pred.reshape(-1)[0]

# прогноз американских ставок
for item in ['ust2', 'ust5', 'ust10', 'us_libor3']:
    X = pd.DataFrame(columns = ['fed_target', item])
    y = pd.DataFrame(columns = [item])
    X['fed_target'] = scf_means_filled['fed_target'][0:]
    X[item][0:240] = scf_means_filled[item][1:241]
    y[item] = scf_means_filled[item][0:240]
    X = X[0:240][:]
    y = y[0:240]

    #здесь нужно перевести массив в формат float, так как по умолчанию некоторые значения попадают в него в строковом виде
    X = X.apply(pd.to_numeric, errors='coerce')

    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.33, random_state=42)

    predictions [item][0] = scf_means_filled[item][0] 
    model, mse = rates_model(Xtrain, ytrain,Xtest, ytest)

    for i in range(1, 13):
        features = np.array([[predictions['fed_target'][i], predictions[item][i-1]]],dtype = 'float64')
        pred = model.predict(features)
        predictions[item][i] = pred.reshape(-1)[0]


In [None]:
#транспонируем массив прогнозов для формата краткосрочного прогноза
predictions_vertical = predictions.T
predictions_vertical