# Market Regime Classification

## Random Forests

### Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (confusion_matrix, roc_curve, roc_auc_score, accuracy_score, precision_score,
                             recall_score, f1_score, classification_report)

### Data

In [None]:
# Load your data (replace 'your_data.csv' with your actual file)
data = pd.read_csv('your_data.csv')

# Assuming the columns in the dataset are named 'feature_1', 'feature_2', 'momentum_returns', 'value_returns'
# Create a new column 'target' to store the label (1 for momentum, 0 for value)
data['target'] = np.where(data['momentum_returns'] > data['value_returns'], 1, 0)

# Select the feature variables and target variable
X = data[['feature_1', 'feature_2']]
y = data['target']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

### Model

In [None]:
# Create a Random Forest classifier and fit it on the training data
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# Predict the labels and probabilities for the testing set
y_pred = rf_classifier.predict(X_test)
y_pred_proba = rf_classifier.predict_proba(X_test)[:, 1]

### Classfifcation Evaluation

In [None]:
# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Compute ROC curve and ROC AUC score
fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
roc_auc = roc_auc_score(y_test, y_pred_proba)

# Compute classification metrics
accuracy = accuracy_score(y_test, y_pred)
error_rate = 1 - accuracy
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
true_positive_rate = recall
false_positive_rate = fpr[1]
specificity = 1 - false_positive_rate
true_negative_rate = specificity
f1 = f1_score(y_test, y_pred)
prevalence = np.sum(y_test) / len(y_test)

# Print classification metrics
print("Classification Metrics:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Error Rate: {error_rate:.2f}")
print(f"True Positive Rate (Recall): {true_positive_rate:.2f}")
print(f"True Negative Rate (Specificity): {true_negative_rate:.2f}")
print(f"False Positive Rate: {false_positive_rate:.2f}")
print(f"Precision: {precision:.2f}")
print(f"F1 Score: {f1:.2f}")
print(f"Prevalence: {prevalence:.2f}")

# Plot confusion matrix
plt.figure(figsize=(6, 6))
plt.matshow(cm, cmap=plt.cm.Blues, alpha=0.3)
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        plt.text(x=j, y=i, s=cm[i, j], va='center', ha='center', fontsize=16)
plt.xticks([0, 1], ['Value', 'Momentum'])
plt.yticks([0, 1], ['Value', 'Momentum'])
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()

# Plot ROC curve
plt.figure(figsize=(6, 6))
plt.plot(fpr, tpr, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC)')
plt.legend(loc='lower right')
plt.show()

### Performance

In [None]:
# Calculate the new time series
binary_bet_returns = []
prob_weighted_returns = []

for i, (value_prob, momentum_prob) in enumerate(y_pred_proba):
    # Binary bet
    if momentum_prob > value_prob:
        binary_bet_returns.append(data.loc[X_test.index[i], 'momentum_returns'])
    else:
        binary_bet_returns.append(data.loc[X_test.index[i], 'value_returns'])

    # Probability weighted bet
    prob_weighted_returns.append(value_prob * data.loc[X_test.index[i], 'value_returns'] + momentum_prob * data.loc[X_test.index[i], 'momentum_returns'])

# Calculate cumulative returns for each time series
cumulative_momentum_returns = np.cumprod(1 + data.loc[X_test.index, 'momentum_returns']) - 1
cumulative_value_returns = np.cumprod(1 + data.loc[X_test.index, 'value_returns']) - 1
cumulative_binary_bet_returns = np.cumprod(1 + np.array(binary_bet_returns)) - 1
cumulative_prob_weighted_returns = np.cumprod(1 + np.array(prob_weighted_returns)) - 1

# Plot the cumulative returns
plt.figure(figsize=(12, 6))
plt.plot(cumulative_momentum_returns, label='Momentum')
plt.plot(cumulative_value_returns, label='Value')
plt.plot(cumulative_binary_bet_returns, label='Binary Bet')
plt.plot(cumulative_prob_weighted_returns, label='Probability Weighted Bet')
plt.xlabel('Time')
plt.ylabel('Cumulative Returns')
plt.title('Comparison of Strategy Performance')
plt.legend()
plt.show()

### Performance Evaluation

In [None]:
def calculate_performance_metrics(returns):
    # Calculate mean return
    mean_return = np.mean(returns)
    
    # Calculate standard deviation
    std_dev = np.std(returns)
    
    # Calculate Sharpe Ratio (assuming a risk-free rate of 0)
    sharpe_ratio = mean_return / std_dev
    
    # Calculate Maximum Drawdown
    cum_returns = np.cumprod(1 + returns) - 1
    peak = np.argmax(np.maximum.accumulate(cum_returns) - cum_returns)
    trough = np.argmax(cum_returns[:peak])
    max_drawdown = cum_returns[trough] - cum_returns[peak]
    
    return mean_return, std_dev, sharpe_ratio, max_drawdown

# Calculate performance metrics for each time series
momentum_metrics = calculate_performance_metrics(data.loc[X_test.index, 'momentum_returns'])
value_metrics = calculate_performance_metrics(data.loc[X_test.index, 'value_returns'])
binary_bet_metrics = calculate_performance_metrics(binary_bet_returns)
prob_weighted_metrics = calculate_performance_metrics(prob_weighted_returns)

# Print performance metrics
print("Momentum Performance Metrics:")
print(f"Mean Return: {momentum_metrics[0]:.4f} | Standard Deviation: {momentum_metrics[1]:.4f} | Sharpe Ratio: {momentum_metrics[2]:.4f} | Max Drawdown: {momentum_metrics[3]:.4f}")
print("Value Performance Metrics:")
print(f"Mean Return: {value_metrics[0]:.4f} | Standard Deviation: {value_metrics[1]:.4f} | Sharpe Ratio: {value_metrics[2]:.4f} | Max Drawdown: {value_metrics[3]:.4f}")
print("Binary Bet Performance Metrics:")
print(f"Mean Return: {binary_bet_metrics[0]:.4f} | Standard Deviation: {binary_bet_metrics[1]:.4f} | Sharpe Ratio: {binary_bet_metrics[2]:.4f} | Max Drawdown: {binary_bet_metrics[3]:.4f}")
print("Probability Weighted Bet Performance Metrics:")
print(f"Mean Return: {prob_weighted_metrics[0]:.4f} | Standard Deviation: {prob_weighted_metrics[1]:.4f} | Sharpe Ratio: {prob_weighted_metrics[2]:.4f} | Max Drawdown: {prob_weighted_metrics[3]:.4f}")