In [14]:
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

from sklearn.metrics import mean_squared_error, accuracy_score


import pandas as pd

In [5]:
df = pd.read_csv('features/2003-12-18:2024-03-08.csv', index_col=['Date'], parse_dates=['Date'])

In [17]:
def evaluate_model(model, X_test, y_test):
  """
  Evaluates the performance of a model on a given test set.

  Args:
      model: Trained model pipeline (can be GridSearchCV or a regular Pipeline).
      X_test: Feature matrix of the test set.
      y_test: Target values of the test set.

  Returns:
      A dictionary containing evaluation metrics for the model.
  """
  predictions = model.predict(X_test)

  # Check if using GridSearchCV (assuming it's the last step)
  if isinstance(model.steps[-1][1], GridSearchCV):
    # Access best model from GridSearchCV
    model = model.best_estimator_

  if hasattr(model.steps[-1][1], 'decision_function'):  # Classification model
    # Use accuracy score for classification
    accuracy = accuracy_score(y_test, predictions)
    return {'Accuracy': accuracy}
  else:  # Regression model
    # Use mean squared error for regression
    mse = mean_squared_error(y_test, predictions)
    return {'Mean Squared Error': mse}

In [7]:
# Pipeline for target Prediction (assuming binary a floating number)
target_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('regressor', GridSearchCV(RandomForestRegressor(),
                                param_grid={'n_estimators': [100, 200, 300],
                                            'max_depth': [5, 10, 15]}))
])


# Split data into features (X) and targets (y)
X_target = df.drop(columns=['target', 'action'])
y_target = df['target']

# Train pipelines
target_pipeline.fit(X_target, y_target)

In [11]:



target = y_target.to_frame()
target['prediction'] = target_pipeline.predict(X_target)

target

Unnamed: 0_level_0,target,prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2003-12-18,1.238497,1.239145
2003-12-19,1.239895,1.238273
2003-12-22,1.240003,1.240183
2003-12-23,1.245299,1.246955
2003-12-24,1.244803,1.244806
...,...,...
2024-03-04,1.085517,1.084783
2024-03-05,1.085600,1.085042
2024-03-06,1.089954,1.088872
2024-03-07,1.095014,1.092606


In [12]:

# Pipeline for Action Prediction (assuming binary buy/sell)
action_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', GridSearchCV(RandomForestClassifier(),
                                param_grid={'n_estimators': [100, 200, 300],
                                            'max_depth': [5, 10, 15]}))
])


# Split data into features (X) and targets (y)
X_action = df.drop(columns=['target', 'action'])
y_action = df['action']

# Train pipelines
action_pipeline.fit(X_action, y_action)


In [13]:
action = y_action.to_frame()
action['prediction'] = action_pipeline.predict(X_action)

action

Unnamed: 0_level_0,action,prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2003-12-18,0,0
2003-12-19,1,1
2003-12-22,1,1
2003-12-23,1,1
2003-12-24,0,0
...,...,...
2024-03-04,1,1
2024-03-05,1,1
2024-03-06,1,1
2024-03-07,1,1


In [18]:

target_metrics = evaluate_model(target_pipeline,X_target, y_target)
action_metrics = evaluate_model(action_pipeline, X_action, y_action)

print("Target Price Evaluation:", target_metrics)
print("Action Prediction Evaluation:", action_metrics)

AttributeError: 'Pipeline' object has no attribute 'best_estimator_'