In [43]:
import pandas as pd
import numpy as np
import numpy.linalg as la
from sklearn.linear_model import Lasso
from scipy import integrate
from scipy import interpolate

## Напишем необходимые функции:

In [44]:
def convert_to_np(stocks_csv):
    stocks_pd = pd.read_csv(stocks_csv, sep = ',', header = 0)
    rev_stocks = np.array(stocks_pd['Цена'])[::-1]
    rev_stocks = [str(x).replace('.','') for x in rev_stocks]
    dig_stocks = np.array([float(x.replace(',','.')) for x in rev_stocks])
    return dig_stocks

In [45]:
def convert_to_np_eng(stocks_csv):
    stocks_pd = pd.read_csv(stocks_csv, sep = ',', header = 0)
    rev_stocks = np.array(stocks_pd['Price'])[::-1]
    dig_stocks = np.array([float(str(x).replace(',','')) for x in rev_stocks])
    return dig_stocks

In [46]:
def get_y(pred_window, relevant_cost, profit_level, loss_level):
    for i in range(len(pred_window)):
        if pred_window[i] > relevant_cost + profit_level/100:
            return 1
        elif pred_window[i] < relevant_cost - loss_level/100:
            return -1
    return 0

profit_level > 0

0 < loss_level < 100

In [47]:
def get_x(data_window):
    return data_window

In [48]:
def get_dataset(stocks_array, data_len, pred_len, profit_level, loss_level):
    x_array = []
    y_array = []
    
    for i in range(len(stocks_array) - data_len - pred_len):
        data_window = np.array(stocks_array[i : i + data_len])
        pred_window = np.array(stocks_array[i + data_len : i + data_len + pred_len])

        data_norm = data_window[0]

        data_window = (data_window - data_norm)/data_norm
        pred_window = (pred_window - data_norm)/data_norm
        relevant_cost = data_window[-1]

        x_array.append(get_x(data_window))

        y_array.append(get_y(pred_window, relevant_cost, profit_level, loss_level))                   

    return x_array, y_array
        
        

In [49]:
def get_acc(Y_test, Y_pred):
    s = 0
    l = 0
    
    if len(Y_test) != len(Y_pred):
        return 'Error: different lengthes'
    
    for i in range(len(Y_pred)):
        if Y_pred[i] == 0:
            continue
        elif Y_pred[i] == Y_test[i]:
            s += 1
        l += 1
    
    if l == 0:
        return 'no prediction'
    
    acc = s/l
    
    if l == 0:
        return 'no prediction'
    
    return acc, l/len(Y_pred)
    

## Создадим модель и датасет

In [50]:
stocks_VISA = convert_to_np('VISA.csv')
stocks_FXIT = convert_to_np('FXIT.csv')

In [51]:
#stocks_VISA[-10:]
stocks_FXIT[-10:]

array([5380., 5339., 5366., 5378., 5412., 5417., 5360., 5348., 5436.,
       5459.])

In [229]:
profit_level = 0.6
loss_level = 0.6
data_len = 20
pred_len = 6
step = 0.35

In [230]:
prediction_model = Lasso(alpha = 0.001)

prediction_model.fit(get_dataset(stocks_VISA[:-500], data_len, pred_len, profit_level, loss_level)[0], get_dataset(stocks_VISA[:-500], data_len, pred_len, profit_level, loss_level)[1])

test_X = get_dataset(stocks_VISA[-500:], data_len, pred_len, profit_level, loss_level)[0]

test_Y = get_dataset(stocks_VISA[-500:], data_len, pred_len, profit_level, loss_level)[1]

In [231]:
#test_X

In [232]:
#test_Y

In [233]:
#prediction_model.predict(test_X)

In [234]:
Y_pred = []

for i in prediction_model.predict(test_X):
    if i > step:
        Y_pred.append(1)
    elif i < -step:
        Y_pred.append(-1)
    else:
        Y_pred.append(0)
        

In [235]:
#Y_pred

In [236]:
get_acc(test_Y, Y_pred)

(0.6666666666666666, 0.006329113924050633)

### Проверим модель на FXIT

In [237]:
test_FXIT_X = get_dataset(stocks_FXIT, data_len, pred_len, profit_level, loss_level)[0]

test_FXIT_Y = get_dataset(stocks_FXIT, data_len, pred_len, profit_level, loss_level)[1]

In [238]:
Y_FXIT_pred = []

for i in prediction_model.predict(test_FXIT_X):
    if i > step:
        Y_FXIT_pred.append(1)
    elif i < -step:
        Y_FXIT_pred.append(-1)
    else:
        Y_FXIT_pred.append(0)

In [239]:
get_acc(test_FXIT_Y, Y_FXIT_pred)

(0.5714285714285714, 0.005742411812961444)

### Проверим модель на других компаниях:

In [240]:
test_AAPL_X = get_dataset(convert_to_np_eng('AAPL.csv'), data_len, pred_len, profit_level, loss_level)[0]

test_AAPL_Y = get_dataset(convert_to_np_eng('AAPL.csv'), data_len, pred_len, profit_level, loss_level)[1]


Y_AAPL_pred = []

for i in prediction_model.predict(test_AAPL_X):
    if i > step:
        Y_AAPL_pred.append(1)
    elif i < -step:
        Y_AAPL_pred.append(-1)
    else:
        Y_AAPL_pred.append(0)
        
get_acc(test_AAPL_Y, Y_AAPL_pred)

(0.6888888888888889, 0.017906884202148827)

In [241]:
test_AMZN_X = get_dataset(convert_to_np_eng('AMZN.csv'), data_len, pred_len, profit_level, loss_level)[0]

test_AMZN_Y = get_dataset(convert_to_np_eng('AMZN.csv'), data_len, pred_len, profit_level, loss_level)[1]


Y_AMZN_pred = []

for i in prediction_model.predict(test_AMZN_X):
    if i > step:
        Y_AMZN_pred.append(1)
    elif i < -step:
        Y_AMZN_pred.append(-1)
    else:
        Y_AMZN_pred.append(0)
        
get_acc(test_AMZN_Y, Y_AMZN_pred)

(0.6486486486486487, 0.029446876243533624)

In [242]:
test_IBM_X = get_dataset(convert_to_np_eng('IBM.csv'), data_len, pred_len, profit_level, loss_level)[0]

test_IBM_Y = get_dataset(convert_to_np_eng('IBM.csv'), data_len, pred_len, profit_level, loss_level)[1]


Y_IBM_pred = []

for i in prediction_model.predict(test_IBM_X):
    if i > step:
        Y_IBM_pred.append(1)
    elif i < -step:
        Y_IBM_pred.append(-1)
    else:
        Y_IBM_pred.append(0)
        
get_acc(test_IBM_Y, Y_IBM_pred)

(0.7037037037037037, 0.010748407643312101)

In [243]:
test_MSFT_X = get_dataset(convert_to_np_eng('MSFT.csv'), data_len, pred_len, profit_level, loss_level)[0]

test_MSFT_Y = get_dataset(convert_to_np_eng('MSFT.csv'), data_len, pred_len, profit_level, loss_level)[1]


Y_MSFT_pred = []

for i in prediction_model.predict(test_MSFT_X):
    if i > step:
        Y_MSFT_pred.append(1)
    elif i < -step:
        Y_MSFT_pred.append(-1)
    else:
        Y_MSFT_pred.append(0)
        
get_acc(test_MSFT_Y, Y_MSFT_pred)

(0.5625, 0.012733784321528054)

In [244]:
test_TSLA_X = get_dataset(convert_to_np_eng('TSLA.csv'), data_len, pred_len, profit_level, loss_level)[0]

test_TSLA_Y = get_dataset(convert_to_np_eng('TSLA.csv'), data_len, pred_len, profit_level, loss_level)[1]


Y_TSLA_pred = []

for i in prediction_model.predict(test_TSLA_X):
    if i > step:
        Y_TSLA_pred.append(1)
    elif i < -step:
        Y_TSLA_pred.append(-1)
    else:
        Y_TSLA_pred.append(0)
        
get_acc(test_TSLA_Y, Y_TSLA_pred)

(0.5577689243027888, 0.10480167014613778)