In [4]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import stats
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression, Lasso, SGDRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from sklearn.metrics import mean_squared_error, accuracy_score, classification_report, confusion_matrix
from sklearn.feature_selection import SelectKBest
pd.set_option('display.width', 100)

'''
currency_pair = ['AUDCAD', 'AUDNZD', 'AUDUSD', 'CADCHF', 'EURAUD', 'EURBRL', 'EURCAD', 'EURCHF', 'EURHUF', 'EURGBP', 'EURJPY', 'EURNZD', 'EURUSD', 'GBPCAD', 'GBPCHF', 'GBPJPY', 'GBPUSD', 'GBPNZD', 'USDCAD', 'USDCHF', 'USDJPY', 'USDMXN', 'USDSGD', 'USDTRY', 'NZDCAD', 'NZDUSD', 'NZDJPY', 'XAGUSD', 'XAUUSD']
for currency in currency_pair:
    time_frames = ['1d', '1wk', '1mo', '3mo']
    for time_frame in time_frames:
        ticket = yf.Ticker(f'{currency}=X')
        dataset = ticket.history(period='max', interval='1d', auto_adjust=True).drop(columns=['Volume', 'Dividends', 'Stock Splits']).rename(columns={'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close'})
        print(pd.DataFrame(dataset))
'''
ticket = yf.Ticker('EURUSD=X')
dataset = ticket.history(period='max', interval='1d', auto_adjust=True).rename(columns={'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close'}).round(5)
dataset['date'] = dataset.index
dataset.reset_index(drop=True, inplace=True)
dataset['up_down'] = np.select([(dataset['open'] > dataset['close'].shift(1)) & (dataset['open'] > dataset['close'].shift(2)) & (dataset['open'] > dataset['close'].shift(3)), 
                                (dataset['open'] < dataset['close'].shift(1)) & (dataset['open'] < dataset['close'].shift(2)) & (dataset['open'] < dataset['close'].shift(3)), 
                                dataset['open'] == dataset['close'].shift(-1)], 
                                ['up', 'down', 'nothing'], default='nothing')

# Handle missing values
dataset.dropna()

# Preprocessing
label_encoder = LabelEncoder()
dataset['up_down'] = label_encoder.fit_transform(dataset['up_down'])

# Create X and Y
x = np.array(dataset.loc[:,['open', 'high']])
y = np.array(dataset.loc[:,'close'])

# SelectKBest
select_kbest = SelectKBest(k='all')
fit = select_kbest.fit(x, y)
df_scores = pd.DataFrame(fit.scores_)
df_columns = pd.DataFrame(['open', 'high'])
featureScores = pd.concat([df_columns, df_scores], axis=1)
featureScores.columns = ['specs', 'score']
featureScores.nlargest(10, 'score').set_index('specs')
print(featureScores)

# Data Split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.30, random_state=1)

# Random Search
model = LinearRegression()
param = {'copy_X': [True, False], 'fit_intercept': [True, False], 'n_jobs': range(0, 100), 'normalize': [True, False]}
randomized_search = RandomizedSearchCV(model, param_distributions=param, n_iter=5, cv=5, random_state=1).fit(x_train, y_train)
print("Best: %f using %s" % (randomized_search.best_score_, randomized_search.best_params_))
means = randomized_search.cv_results_['mean_test_score']
stds = randomized_search.cv_results_['std_test_score']
parameters = randomized_search.cv_results_['params']
for mean, stdev, param in zip(means, stds, parameters):
    print(f' {mean} {stdev} with: {param}')
print("Best: %f using %s" % (randomized_search.best_score_, randomized_search.best_params_))

# Pipeline
pipe_lr = make_pipeline(MinMaxScaler(), PCA(), LinearRegression(copy_X=False, fit_intercept=True, n_jobs=18, normalize=False)).fit(x_train, y_train)
pipe_la = make_pipeline(MinMaxScaler(), PCA(n_components=1), Lasso()).fit(x_train, y_train)
pipe_sdgr = make_pipeline(MinMaxScaler(), PCA(n_components=1), SGDRegressor()).fit(x_train, y_train)
pipe_rfr = make_pipeline(MinMaxScaler(), PCA(n_components=1), RandomForestRegressor()).fit(x_train, y_train)
pipe_ext = make_pipeline(MinMaxScaler(), PCA(), ExtraTreesRegressor()).fit(x_train, y_train)
pipe_ann = make_pipeline(MinMaxScaler(), PCA(), MLPRegressor()).fit(x_train, y_train)

# Metrics
print(f'Score Linear Regression: {pipe_lr.score(x_test, y_test)}')

print(f'Score Lasso: {pipe_la.score(x_test, y_test)}')
print(f'Score SGDRegressor: {pipe_sdgr.score(x_test, y_test)}')
print(f'Score Random Forest Regressor: {pipe_rfr.score(x_test, y_test)}')
print(f'Score Extra Trees Regressor: {pipe_ext.score(x_test, y_test)}')
print(f'Score ANN: {pipe_ann.score(x_test, y_test)}')

  Specs       Score
0  open  522.360745
1  high  755.760761
Best: 0.998604 using {'normalize': False, 'n_jobs': 18, 'fit_intercept': True, 'copy_X': True}
 0.9986035569778012 0.00010329006245115759 with: {'normalize': False, 'n_jobs': 18, 'fit_intercept': True, 'copy_X': True}
 0.9985967364178956 0.00010143238399550587 with: {'normalize': False, 'n_jobs': 17, 'fit_intercept': False, 'copy_X': True}
 0.9986035569778012 0.00010329006245115759 with: {'normalize': True, 'n_jobs': 36, 'fit_intercept': True, 'copy_X': True}
 0.9985967364178956 0.00010143238399550587 with: {'normalize': False, 'n_jobs': 83, 'fit_intercept': False, 'copy_X': False}
 0.9985967364178956 0.00010143238399550587 with: {'normalize': False, 'n_jobs': 57, 'fit_intercept': False, 'copy_X': False}
Best: 0.998604 using {'normalize': False, 'n_jobs': 18, 'fit_intercept': True, 'copy_X': True}


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LinearRegression())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LinearRegression())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)


If you wish to scale the data, use Pipeline wi

Score Linear Regression: 0.998852837680619
Score Lasso: -3.919211682301693e-06
Score SGDRegressor: 0.9900687192605059
Score Random Forest Regressor: 0.9981895476784348
Score Extra Trees Regressor: 0.9985412965534964
Score ANN: 0.9676524987197531
