In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score
import random
import warnings

# Suppress warnings
with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=FutureWarning)

# Load the dataset
df_original = pd.read_csv('all_etfs_and_indexes_data_with_descriptions_and_action_sorted.csv')
df_original['Date'] = pd.to_datetime(df_original['Date'])
df_original.set_index('Date', inplace=True)

# Prepare a list to collect rows
results_list = []

# Get unique symbols
symbols = df_original['Symbol'].unique()

for symbol in symbols:
    df_symbol = df_original[df_original['Symbol'] == symbol]
    
    for i in range(100):  # Adjust as needed
        short_window = random.randint(3, 130)
        long_window = random.randint(short_window + 1, 140)
        print(f"Symbol: {symbol}, Trying window settings: short={short_window}, long={long_window}.")

        df = df_symbol.copy()
        df['short_mavg'] = df['Adj Close'].rolling(window=short_window, min_periods=1).mean()
        df['long_mavg'] = df['Adj Close'].rolling(window=long_window, min_periods=1).mean()
        df['signal'] = np.where(df['short_mavg'] > df['long_mavg'], 1, -1)
        df['actual_returns'] = df['Adj Close'].pct_change()
        df.dropna(inplace=True)

        if df.empty:
            continue

        # Define training and testing sets
        training_window_end = df.index.min() + pd.DateOffset(years=1)
        testing_window_start = training_window_end + pd.DateOffset(days=1)

        X = df[['short_mavg', 'long_mavg']]
        y = df['signal']
        X_train = X.loc[:training_window_end]
        y_train = y.loc[:training_window_end]
        X_test = X.loc[testing_window_start:]
        y_test = y.loc[testing_window_start:]

        if X_train.empty or X_test.empty:
            continue

        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        model = AdaBoostClassifier()
        model.fit(X_train_scaled, y_train)
        predictions = model.predict(X_test_scaled)
        accuracy = accuracy_score(y_test, predictions)

        # Collect results
        results_list.append({
            'Symbol': symbol, 
            'Short Window': short_window, 
            'Long Window': long_window, 
            'Accuracy': accuracy
        })

# Convert the results list to a DataFrame
results_df = pd.DataFrame(results_list)

# Sort the DataFrame by 'Accuracy' in descending order
sorted_results_df = results_df.sort_values(by='Accuracy', ascending=False)

# Export the sorted DataFrame to a CSV file
sorted_results_df.to_csv('strategy_results_adaboost_across_symbols.csv', index=False)
print("AdaBoost results summary saved to strategy_results_adaboost_across_symbols.csv")


Symbol: LQD, Trying window settings: short=75, long=100.
Symbol: LQD, Trying window settings: short=51, long=98.
Symbol: LQD, Trying window settings: short=26, long=61.
Symbol: LQD, Trying window settings: short=36, long=80.
Symbol: LQD, Trying window settings: short=83, long=109.
Symbol: LQD, Trying window settings: short=43, long=78.
Symbol: LQD, Trying window settings: short=105, long=111.
Symbol: LQD, Trying window settings: short=27, long=91.
Symbol: LQD, Trying window settings: short=78, long=108.
Symbol: LQD, Trying window settings: short=70, long=84.
Symbol: LQD, Trying window settings: short=18, long=113.
Symbol: LQD, Trying window settings: short=125, long=130.
Symbol: LQD, Trying window settings: short=55, long=95.
Symbol: LQD, Trying window settings: short=64, long=134.
Symbol: LQD, Trying window settings: short=27, long=43.
Symbol: LQD, Trying window settings: short=43, long=54.
Symbol: LQD, Trying window settings: short=62, long=110.
Symbol: LQD, Trying window settings: s