In [None]:
# Imports
import numpy as np
import pandas as pd
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC  # Import LinearSVC instead of SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score, precision_score, recall_score, confusion_matrix

# Functions to calculate RSI and MACD
def rsi(data, periods=14):
    delta = data['Close'].diff()
    gain, loss = delta.copy(), delta.copy()
    gain[gain < 0] = 0
    loss[loss > 0] = 0

    avg_gain = gain.rolling(window=periods).mean()
    avg_loss = -loss.rolling(window=periods).mean()

    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def macd(data, short=12, long=26, signal=9):
    exp1 = data['Close'].ewm(span=short, adjust=False).mean()
    exp2 = data['Close'].ewm(span=long, adjust=False).mean()
    macd = exp1 - exp2
    signal_line = macd.ewm(span=signal, adjust=False).mean()
    return macd, signal_line

# Load data
data = pd.read_csv('AAPL.csv')

# Feature engineering
data['Average'] = (data['Open'] + data['Close'] + data['High'] + data['Low']) / 4
data['HL_PCT'] = (data['High'] - data['Low']) / data['Low']
data['PCT_change'] = (data['Close'] - data['Open']) / data['Open']
data['Volume_pct_change'] = data['Volume'].pct_change()
data['RSI'] = rsi(data)
data['MACD'], data['Signal'] = macd(data)

# Shift the target variable to predict future stock prices
forecast_out = 5
data['Target'] = data['Close'].shift(-forecast_out)

# Calculate the price direction (1 for gain, 0 for loss)
data['Price_Direction'] = (data['Target'] > data['Close']).astype(int)

data.dropna(inplace=True)

# Split the data into features (X) and target (y) variables
X = data.drop(['Date', 'Close', 'Target', 'Price_Direction', 'Volume'], axis=1)
y = data['Price_Direction']

tscv = TimeSeriesSplit(n_splits=5)

svm_model = LinearSVC(dual=True)  # dual = True --> coordinate descent 

param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10],
}

# Perform grid search 
grid_search = GridSearchCV(svm_model, param_grid, cv=tscv, scoring='accuracy')
grid_search.fit(X, y)  

# Set model with the best hyperparameters
best_svm_model = grid_search.best_estimator_

for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Feature scaling for current train-test split (prevents data leakage)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Make predictions 
    y_pred = best_svm_model.predict(X_test_scaled)
    y_pred_decision_scores = best_svm_model.decision_function(X_test_scaled)  

    # Calculate performance metrics
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred_decision_scores)  # Calculate ROC AUC score

    # Print the results for each fold
    print("-------------")
    print("Best SVM Model Parameters: ", grid_search.best_params_)
    print("Accuracy: ", accuracy)
    print("ROC AUC Score: ", roc_auc)
    print("F1 Score: ", f1)
    print("Precision: ", precision)
    print("Recall: ", recall)
    print("Confusion Matrix: \n", conf_matrix)

    # Future predictions
    future_dates = 5  # Set the number of days to predict
    X_future = X.iloc[test_index[-future_dates:]]
    X_future_scaled = scaler.transform(X_future)  # Scale the future data using the same scaler
    future_predictions = best_svm_model.predict(X_future_scaled)

    # Convert predicted directions to gain/loss
    future_directions = ['gain' if direction == 1 else 'loss' for direction in future_predictions]

    print("Future Predictions: ", future_directions)
    print("-------------")
    print("")



-------------
Best SVM Model Parameters:  {'C': 10}
Accuracy:  0.5096969696969696
F1 Score:  0.0
Precision:  0.0
Recall:  0.0
Confusion Matrix: 
 [[841   0]
 [809   0]]
Future Predictions:  ['loss', 'loss', 'loss', 'loss', 'loss']
-------------

-------------
Best SVM Model Parameters:  {'C': 10}
Accuracy:  0.4703030303030303
F1 Score:  0.42348284960422167
Precision:  0.46589259796806964
Recall:  0.38814993954050786
Confusion Matrix: 
 [[455 368]
 [506 321]]
Future Predictions:  ['loss', 'loss', 'loss', 'loss', 'loss']
-------------

-------------
Best SVM Model Parameters:  {'C': 10}
Accuracy:  0.45575757575757575
F1 Score:  0.26633986928104575
Precision:  0.5077881619937694
Recall:  0.1805094130675526
Confusion Matrix: 
 [[589 158]
 [740 163]]
Future Predictions:  ['loss', 'loss', 'loss', 'loss', 'loss']
-------------

-------------
Best SVM Model Parameters:  {'C': 10}
Accuracy:  0.4224242424242424
F1 Score:  0.0
Precision:  0.0
Recall:  0.0
Confusion Matrix: 
 [[697   0]
 [953   0]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
