In [32]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Lasso
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score, mean_squared_error,mean_absolute_percentage_error,mean_absolute_error
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
###
from lightgbm import LGBMRegressor
##
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
import xgboost as xg
class SeasonalForecaster:
  def __init__(self):
    pass
  def _calculate_metrics(self, y_true, y_pred):
    print('mse: ', mean_squared_error(y_true, y_pred))
    print('mae: ', mean_absolute_error(y_true, y_pred))
    print('r2_score:', r2_score(y_true, y_pred))
    print('MAPE: ', mean_absolute_percentage_error(y_true, y_pred))
    print('predictions: ', y_pred)
  def Linear_regression(self,x,y):
    self.m = LinearRegression()
    self.m.fit(x, y)
    pred = self.m.predict(x)
    print(self._calculate_metrics(y,pred))
    return self.m
  def Poly_regression(self, degree,x,y):
    self.m = PolynomialFeatures(degree)
    lin_reg = LinearRegression()
    Poly_Features= self.m .fit_transform(x)
    lin_reg.fit(Poly_Features, y)
    pred = lin_reg.predict(x)
    print(self._calculate_metrics(y,pred))
    return lin_reg
  def Ridge_regression(self, alpha,x,y):
    self.m = Ridge(alpha=alpha)
    self.m.fit(x, y)
    pred = self.m.predict(x)
    print(self._calculate_metrics(y,pred))
    return self.m
  def Lasso_regression(self, alpha,x,y):
    self.m = Lasso(alpha=alpha)
    self.m.fit(x, y)
    pred = self.m.predict(x)
    print(self._calculate_metrics(y,pred))
    return self.m
  def Elastic_net(self, alpha, l1_ratio,x,y):
    self.m = ElasticNet(alpha= alpha, l1_ratio=l1_ratio)
    self.m.fit(x, y)
    pred = self.m.predict(x)
    print(self._calculate_metrics(y,pred))
    return self.m
  def SARIMAX(self,x,y, order = (1,1,1,7)):
    model = SARIMAX(x, exog=x,
                        order=order,
                        seasonal_order=(1,1,1,7),
                        enforce_stationarity=False,
                        enforce_invertibility=False)
    sarima_result = model.fit(disp=False)
    sarima_pred = sarima_result.forecast(steps=len(x), exog=x)
    print(self._calculate_metrics(y,sarima_pred))
    return model
  def SVR(self,x,y,kernel="poly", degree=2, C=100):
    self.m = SVR(kernel=kernel, degree=degree, C=C, epsilon=0.1)
    self.m.fit(x, y)
    pred = self.m.predict(x)
    print(self._calculate_metrics(y,pred))
    return self.m
  def Tree_regression(self,x,y,max_depth=2):
    self.m = DecisionTreeRegressor(max_depth=max_depth)
    self.m.fit(x, y)
    pred = self.m.predict(x)
    print(self._calculate_metrics(y,pred))
    return self.m
  def Light_GBM_regression(self,x,y,metric):
    self.m = LGBMRegressor(metric=metric)
    self.m.fit(x, y)
    pred = self.m.predict(x)
    print(self._calculate_metrics(y,pred))
    return self.m
  def XGB_regression(self,x,y,objective ='reg:linear', n_estimators = 10):
    self.m  = xg.XGBRegressor(objective =objective, n_estimators = n_estimators)
    self.m.fit(x, y)
    pred = self.m.predict(x)
    print(self._calculate_metrics(y,pred))
    return self.m
  def HistGradientBoostingRegressor(self,x,y):
    self.m  =HistGradientBoostingRegressor()
    self.m.fit(x, y)
    pred = self.m.predict(x)
    print(self._calculate_metrics(y,pred))
    return self.m
  def KNeighborsRegressor(self,x,y, cluster):
    self.m = KNeighborsRegressor(n_neighbors=cluster)
    self.m.fit(x, y)
    pred = self.m.predict(x)
    print(self._calculate_metrics(y,pred))
    return self.m

def validation_testing(x_test,y_test, model):
  y_pred = model.predict(x_test)
  print('mse: ', mean_squared_error(y_test, y_pred))
  print('mae: ', mean_absolute_error(y_test, y_pred))
  print('r2_score:', r2_score(y_test, y_pred))
  print('MAPE: ', mean_absolute_percentage_error(y_test, y_pred))
  print('predictions: ', y_pred)

def Train_test_split(x,y,test_size,random_state):
  X_train, X_test, y_train, y_test = train_test_split(x, y, test_size= test_size, random_state=random_state)
  print("the training size is: ",X_train.shape)
  print("The testing size is: ",X_test.shape)
  print("The training target size is: ",y_train.shape)
  print("The testing targe size is: ",y_test.shape)
  return X_train, X_test, y_train, y_test

def read_data(path, target : str, features: list):
  _, form= path.split(".")
  if form == "csv":
    df = pd.read_csv(path)
  if form == "xls" or form =="XLS":
     df = pd.read_excel(path)
  data = df[features]
  target =df[target]
  return data, target



In [33]:
data, target = read_data('/content/XAU_1d_data_2004_to_2024-09-20.csv','Close', ["High", "Open", "Low"])

In [34]:
X_train, X_test, y_train, y_test = Train_test_split(data, target, 0.3, 21)

the training size is:  (3643, 3)
The testing size is:  (1562, 3)
The training target size is:  (3643,)
The testing targe size is:  (1562,)


In [35]:
model = SeasonalForecaster()

In [36]:
ridge = model.Ridge_regression(1.2,X_train,y_train )

mse:  31.527038586664908
mae:  3.7719214399062877
r2_score: 0.9998598973745674
MAPE:  0.0029811396437718365
predictions:  [ 895.9442848  1719.60599833 1310.88149238 ...  827.88133128  406.37562063
  653.36725344]
None


In [38]:
validation_testing(X_test,y_test, ridge)

mse:  30.03444125597344
mae:  3.8080961215061615
r2_score: 0.9998625732086229
MAPE:  0.003014003047297934
predictions:  [1339.44636621  655.65253848 1338.44723215 ... 1199.55820848  884.00901544
 1282.18703559]


In [41]:
KNN = model.KNeighborsRegressor(X_train,y_train,3)

mse:  25.61244827218106
mae:  3.3556711501509744
r2_score: 0.9998861811509245
MAPE:  0.0026767318220139386
predictions:  [ 895.83333333 1717.26666667 1313.7        ...  822.36666667  405.73333333
  655.2       ]
None


In [42]:
validation_testing(X_test,y_test, KNN)

mse:  52.015568324085876
mae:  4.77584933845497
r2_score: 0.9997619954839342
MAPE:  0.003813493274627549
predictions:  [1339.04666667  654.66666667 1339.7        ... 1199.71        879.43333333
 1284.57666667]


In [43]:
HistGradientBoostingRegressor = model.HistGradientBoostingRegressor(X_train,y_train)

mse:  62.25500470871762
mae:  4.887250452913198
r2_score: 0.9997233457375948
MAPE:  0.0039774613854930176
predictions:  [ 900.70684143 1722.98767432 1312.8494338  ...  819.01112266  404.88925321
  654.64841391]
None


In [44]:
validation_testing(X_test,y_test, HistGradientBoostingRegressor)

mse:  107.15800527621172
mae:  6.20881027210341
r2_score: 0.999509683542638
MAPE:  0.004897718871478396
predictions:  [1335.92444144  658.82835487 1338.43039286 ... 1201.1870111   882.30704227
 1280.79631144]
