In [1]:
import pandas as pd
import numpy as np
import numpy.linalg as la
from sklearn.linear_model import Lasso
from scipy import integrate
from scipy import interpolate

## Напишем необходимые функции:

In [2]:
def incr_data(data):
    f = interpolate.interp1d(np.arange(len(data)), data)
    incr_data = f(np.arange(0, len(data) - 1, 0.1))
    return incr_data

In [3]:
def sigmoid(t):
    return [1/(1 + np.exp(-i)) for i in t]

In [4]:
def convert_to_np(stocks_csv):
    stocks_pd = pd.read_csv(stocks_csv, sep = ',', header = 0)
    rev_stocks = np.array(stocks_pd['Цена'])[::-1]
    rev_stocks = [x.replace('.','') for x in rev_stocks]
    dig_stocks = np.array([float(x.replace(',','.')) for x in rev_stocks])
    return dig_stocks

In [5]:
def get_y(pred_window, relevant_cost, profit_level, loss_level):
    for i in range(len(pred_window)):
        if pred_window[i] > relevant_cost*(1 + profit_level/100):
            return 1
        elif pred_window[i] < relevant_cost*(1 - loss_level/100):
            return -1
    return 0

profit_level > 0

0 < loss_level < 100

In [6]:
def get_x(data_window):
    int_array = get_int_array(data_window)
    diff_array = get_diff_array(data_window)
    return np.concatenate((incr_data(data_window), sigmoid(int_array), sigmoid(diff_array)))

In [7]:
def get_int_array(data):
    int_array = []
    for j in range(len(data)):
        int_array.append(integrate.trapz(data[:-j], [i for i in range(len(data[:-j]))]))
    return int_array

In [8]:
def get_diff_array(data):
    diff_array = []
    for j in range(len(data) - 4):
        diff_array.append((data[j] - 8*data[j+1] + 8*data[j+3] - data[j+4])/12)
    return diff_array

In [9]:
def get_dataset(stocks_array, data_len, pred_len, profit_level, loss_level):
    x_array = []
    y_array = []
    
    for i in range(len(stocks_array) - data_len - pred_len):
        data_window = stocks_array[i : i + data_len]/stocks_array[i : i + data_len].max()
        pred_window = stocks_array[i + data_len : i + data_len + pred_len]/stocks_array[i + data_len: i + data_len + pred_len].max()
        relevant_cost = data_window[-1]
        
        x_array.append(get_x(data_window))
        
        y_array.append(get_y(pred_window, relevant_cost, profit_level, loss_level))                   
    
    return x_array, y_array
        
        

In [10]:
def get_acc(Y_test, Y_pred):
    s = 0
    l = 0
    
    if len(Y_test) != len(Y_pred):
        return 'Error: different lengthes'
    
    for i in range(len(Y_pred)):
        if Y_pred[i] != 0:
            s += Y_pred[i]*Y_test[i]
            l += 1
    acc = s/l
    
    return acc, l/len(Y_pred)
    

## Создадим модель и датасет

In [11]:
stocks_VISA = convert_to_np('VISA.csv')
stocks_FXIT = convert_to_np('FXIT.csv')

In [12]:
#stocks_VISA[-10:]
stocks_FXIT[-10:]

array([5380., 5339., 5366., 5378., 5412., 5417., 5360., 5348., 5436.,
       5459.])

In [13]:
prediction_model = Lasso(alpha = 0.001)

prediction_model.fit(get_dataset(stocks_VISA[:-500], 60, 20, 10, 10)[0], get_dataset(stocks_VISA[:-500], 60, 20, 3, 3)[1])

test_X = get_dataset(stocks_VISA[-500:], 60, 20, 3, 3)[0]

test_Y = get_dataset(stocks_VISA[-500:], 60, 20, 3, 3)[1]

In [14]:
Y_pred = []

for i in prediction_model.predict(test_X):
    if i > 0.5:
        Y_pred.append(1)
    elif i < -0.5:
        Y_pred.append(-1)
    else:
        Y_pred.append(0)
        

In [15]:
get_acc(test_Y, Y_pred)

(0.9757281553398058, 0.49047619047619045)

### Проверим модель на FXIT

In [16]:
test_FXIT_X = get_dataset(stocks_FXIT, 60, 20, 3, 3)[0]

test_FXIT_Y = get_dataset(stocks_FXIT, 60, 20, 3, 3)[1]

In [17]:
Y_FXIT_pred = []

for i in prediction_model.predict(test_FXIT_X):
    if i > 0.5:
        Y_FXIT_pred.append(1)
    elif i < -0.5:
        Y_FXIT_pred.append(-1)
    else:
        Y_FXIT_pred.append(0)

In [18]:
get_acc(test_FXIT_Y, Y_FXIT_pred)

(0.9038461538461539, 0.49098712446351933)