In [707]:
import os
import numpy as np
import pandas as pd
import xgboost as xgb
import matplotlib.pyplot as plt
from xgboost import plot_importance, plot_tree
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score,classification_report
from sklearn.metrics import roc_curve, auc

# Time series decomposition
from statsmodels.tsa.seasonal import seasonal_decompose

# Chart drawing
import plotly as py
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

# Mute sklearn warnings
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
simplefilter(action='ignore', category=DeprecationWarning)

# Show charts when running kernel
init_notebook_mode(connected=True)

# Change default background color for all visualizations
layout=go.Layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(250,250,250,0.8)')
fig = go.Figure(layout=layout)
templated_fig = pio.to_templated(fig)
pio.templates['my_template'] = templated_fig.layout.template
pio.templates.default = 'my_template'

In [708]:
file_path = '/Users/edocampione/Desktop/Meng Engineering Science/4YP/scripts/dati_trattati.csv' # cleaned data
df = pd.read_csv(file_path)
df['datadate'] = pd.to_datetime(df['datadate'])
df['datadate'] = df['datadate'] + pd.offsets.QuarterEnd(0)

df = df.dropna(subset=['relative_quarterly_return'])

df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 14322 entries, 1 to 15338
Data columns (total 48 columns):
 #   Column                          Non-Null Count  Dtype         
---  ------                          --------------  -----         
 0   gvkey                           14322 non-null  int64         
 1   datadate                        14322 non-null  datetime64[ns]
 2   tic                             14322 non-null  object        
 3   actq                            14322 non-null  float64       
 4   ancq                            14322 non-null  float64       
 5   epsfxq                          14302 non-null  float64       
 6   lctq                            14322 non-null  float64       
 7   ltq                             14322 non-null  float64       
 8   revtq                           14322 non-null  float64       
 9   capxy                           13783 non-null  float64       
 10  roa                             14289 non-null  float64       
 11  ro

In [709]:
drop_features = ['capxy', 'cfm', 'PEG_trailing', 'de_ratio', 'divyield']
df = df.drop(drop_features, 1)
df = df.dropna()
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11212 entries, 32 to 15337
Data columns (total 43 columns):
 #   Column                          Non-Null Count  Dtype         
---  ------                          --------------  -----         
 0   gvkey                           11212 non-null  int64         
 1   datadate                        11212 non-null  datetime64[ns]
 2   tic                             11212 non-null  object        
 3   actq                            11212 non-null  float64       
 4   ancq                            11212 non-null  float64       
 5   epsfxq                          11212 non-null  float64       
 6   lctq                            11212 non-null  float64       
 7   ltq                             11212 non-null  float64       
 8   revtq                           11212 non-null  float64       
 9   roa                             11212 non-null  float64       
 10  roe                             11212 non-null  float64       
 11  p

In [710]:
# Find the unique number of stocks in the dataframe
unique_stocks = df['tic'].unique()

# Print the result
print(f"Number of stocks: {len(unique_stocks)}")
print(unique_stocks)


Number of stocks: 80
['AAPL' 'ABT' 'ADBE' 'ADM' 'AMD' 'AMGN' 'AMT' 'AMZN' 'APD' 'BA' 'BAX'
 'BMY' 'CAT' 'CL' 'CME' 'COP' 'COST' 'CSCO' 'CTSH' 'CVS' 'CVX' 'DIS' 'ECL'
 'EQIX' 'EXC' 'EXPE' 'F' 'FDX' 'FIS' 'GE' 'GOOGL' 'HCA' 'HD' 'IBM' 'INTC'
 'JBL' 'JNJ' 'KMB' 'KO' 'LLY' 'LMT' 'LUV' 'MA' 'MCD' 'META' 'MMC' 'MMM'
 'MRK' 'MSFT' 'MU' 'NFLX' 'NKE' 'NOC' 'NVDA' 'OXY' 'PFE' 'PG' 'PPL' 'PYPL'
 'QCOM' 'REGN' 'RJF' 'RTX' 'SBUX' 'SPGI' 'STZ' 'T' 'TGT' 'TMO' 'TSLA'
 'TSN' 'UNH' 'UPS' 'V' 'VZ' 'WBA' 'WDC' 'WMT' 'XOM' 'ZTS']


In [711]:
df = df.dropna()
df = df.sort_values(by=['datadate'])

df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11212 entries, 172 to 3737
Data columns (total 43 columns):
 #   Column                          Non-Null Count  Dtype         
---  ------                          --------------  -----         
 0   gvkey                           11212 non-null  int64         
 1   datadate                        11212 non-null  datetime64[ns]
 2   tic                             11212 non-null  object        
 3   actq                            11212 non-null  float64       
 4   ancq                            11212 non-null  float64       
 5   epsfxq                          11212 non-null  float64       
 6   lctq                            11212 non-null  float64       
 7   ltq                             11212 non-null  float64       
 8   revtq                           11212 non-null  float64       
 9   roa                             11212 non-null  float64       
 10  roe                             11212 non-null  float64       
 11  p

In [712]:
results = pd.DataFrame(columns=['datadate', 'tic', 'next_prccq_change', 'next_relative_quarterly_return', 'y_pred'])

y_train = []
y_valid = []
y_test = []

y_train_pred = []
y_valid_pred = []
y_test_pred = []

removed_stocks = []

for stock in unique_stocks:
    try:
        # Your processing code here

        df_local = df[df['tic'] == stock].reset_index(drop=True)

        train_start_date = '1970-03-31'
        valid_start_date = '2010-03-31'
        test_start_date = '2013-03-31'
        end_date = '2023-09-30'

        train_df = df_local[(df_local['datadate'] >= train_start_date) & (df_local['datadate'] < valid_start_date)].sort_values(by=['datadate'])
        valid_df = df_local[(df_local['datadate'] >= valid_start_date) & (df_local['datadate'] < test_start_date)].sort_values(by=['datadate'])
        test_df = df_local[(df_local['datadate'] >= test_start_date) & (df_local['datadate'] < end_date)].sort_values(by=['datadate'])

        if train_df.empty:
            removed_stocks.append(stock)
            continue  # Skip this iteration if the DataFrame is empty

        if test_df.empty:
            removed_stocks.append(stock)
            continue  # Skip this iteration if the DataFrame is empty

        results_temp = test_df[['datadate', 'tic', 'next_prccq_change', 'next_relative_quarterly_return']].copy()

        drop_cols = ['gvkey', 'datadate', 'tic', 'spindx', 'spindx_change', 'next_spindx_change', 'prccq', 'prccq_change', 'next_prccq_change']

        train_df = train_df.drop(drop_cols, 1)
        valid_df = valid_df.drop(drop_cols, 1)
        test_df  = test_df.drop(drop_cols, 1)

        y_train_temp = train_df['next_relative_quarterly_return'].copy()
        X_train = train_df.drop(['next_relative_quarterly_return',], 1)

        y_valid_temp = valid_df['next_relative_quarterly_return'].copy()
        X_valid = valid_df.drop(['next_relative_quarterly_return',], 1)

        y_test_temp  = test_df['next_relative_quarterly_return'].copy()
        X_test  = test_df.drop(['next_relative_quarterly_return',], 1)

        # Scaling
        scaler = MinMaxScaler()

        # Fit the scaler on the training/validation data
        X_train_scaled = scaler.fit_transform(X_train)

        # Now apply the same scaling to the val/test data using the already fitted scaler
        #X_valid_scaled = scaler.transform(X_valid)
        X_test_scaled = scaler.transform(X_test)

        # Convert the scaled data back to DataFrame for consistency
        X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns)
        X_valid_scaled = pd.DataFrame(X_valid_scaled, columns=X_valid.columns)
        X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns)

        parameters = {
            'n_estimators': [100],
            'learning_rate': [0.05],
            'max_depth': [8],
            'gamma': [0.1],
            'random_state': [42]
        }

        eval_set = [(X_train_scaled, y_train_temp), (X_test_scaled, y_test_temp)]
        model = xgb.XGBRegressor(eval_set=eval_set, objective='reg:squarederror', verbose=False)
        clf = GridSearchCV(model, parameters)

        clf.fit(X_train_scaled, y_train_temp)

        print(f'Best params: {clf.best_params_}')
        print(f'Best validation score = {clf.best_score_}')

        model = xgb.XGBRegressor(**clf.best_params_, objective='reg:squarederror')
        model.fit(X_train_scaled, y_train_temp, eval_set=eval_set, verbose=False)

        # Predictions
        y_train_pred_temp = model.predict(X_train_scaled)
        y_valid_pred_temp = model.predict(X_valid_scaled)
        y_test_pred_temp = model.predict(X_test_scaled)
        y_train_pred.extend(y_train_pred_temp)
        y_valid_pred.extend(y_valid_pred_temp)
        y_test_pred.extend(y_test_pred_temp)

        y_train.extend(y_train_temp)
        y_valid.extend(y_valid_temp)
        y_test.extend(y_test_temp)

        results_temp['y_pred'] = y_test_pred_temp
        results = pd.concat([results, results_temp], ignore_index=True)

    except Exception as e:
        removed_stocks.append(stock)
        print(f"Error encountered: {e}")  # Logs the error
        continue



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.014121836421940226



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.02829947967081401



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.311406155833497



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.06758348068453537



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.27352064246217256



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.12373727932740004



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.11101902573505595



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.07865580711601447



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.21263843515140674



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.0949891982316962



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.0194559277172782



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.0637823270414398



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.09365362294111368



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.9879978304113303



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.048150029648377335



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.1113895693650413



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -12.065489241365448



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.06928986827352257



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.05237546763125658



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.05633318605629245



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.10272830565980651



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -2.380377038358969



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.08587936193739058



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.09988059654808265



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.19567668014383052



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.024727041068306765



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.5411370162139022



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.08595237170268319
Error encountered: Cannot have number of splits n_splits=5 greater than the number of samples: n_samples=4.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.0753006725157487



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.1444577779007365



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.036156009766764786



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.06415133544943497



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.37723024844769126



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.0028338258754393884



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.08161011934705598



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.02373288475930373



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.04410487980172917



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.09450674924152244



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.17098706445068607



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.3069962478963513



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.027124811644589



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.08145674973496426



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.0063273320009820996



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.016709223199584212



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -4.472818408409075



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.04451034708837163



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -2.236989835340701



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.17402458566763698



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.19500324203072142



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.45584929187892415



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.00633644854973423



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.05165048663721587



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.02937226531703028



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.006071334122875372



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.073258406316527



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.10700171425894218



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.04951713674221945



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.23412185007433753



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.12015829376173952



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.01288835520277216



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.03131119455485978



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.1838251599008534



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.0520871705138309



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.19497167462884726



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.16069117086742074



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.052144502528999936



Parameters: { "eval_set", "verbose" } are not used.



R^2 score is not well-defined with less than two samples.


Parameters: { "eval_set", "verbose" } are not used.



R^2 score is not well-defined with less than two samples.


Parameters: { "eval_set", "verbose" } are not used.



R^2 score is not well-defined with less than two samples.


Parameters: { "eval_set", "verbose" } are not used.



R^2 score is not well-defined with less than two samples.


Parameters: { "eval_set", "verbose" } are not used.



R^2 score is not well-defined with less than two samples.


One or more of the test scores are non-finite: [nan]


Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = nan



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.0297072194277578



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.04471073165455679



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.24923813700078137



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = 0.011799185075074558



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.



Parameters: { "eval_set", "verbose" } are not used.




Best params: {'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 8, 'n_estimators': 100, 'random_state': 42}
Best validation score = -0.012970262304416069


In [713]:
print(removed_stocks)

['AMT', 'CTSH', 'GOOGL', 'META', 'PYPL', 'RJF', 'TSLA', 'ZTS']


In [714]:
results = results.sort_values(by=['datadate', 'next_relative_quarterly_return'], ascending=[True, False])
results.head(10)

Unnamed: 0,datadate,tic,next_prccq_change,next_relative_quarterly_return,y_pred
158,2013-03-31,AMD,0.6,0.566207,0.306892
1642,2013-03-31,MU,0.394627,0.318003,0.038616
2044,2013-03-31,REGN,0.274808,0.241016,-0.065373
489,2013-03-31,CME,0.236769,0.202977,0.073255
2598,2013-03-31,WDC,0.234639,0.200846,0.16998
1600,2013-03-31,MSFT,0.207656,0.173863,0.042969
317,2013-03-31,BA,0.193244,0.159451,0.074426
1763,2013-03-31,NOC,0.180328,0.146535,0.024585
2192,2013-03-31,STZ,0.198237,0.121613,0.020715
2126,2013-03-31,SBUX,0.150307,0.116515,0.067577


In [715]:
# Classifier
y_train = np.array(y_train)
y_valid = np.array(y_valid)
y_test = np.array(y_test)

y_train_pred = np.array(y_train_pred)
y_valid_pred = np.array(y_valid_pred)
y_test_pred = np.array(y_test_pred)

y_train_pred_binary = (y_train_pred >= 0).astype(int)
y_valid_pred_binary = (y_valid_pred >= 0).astype(int)
y_test_pred_binary = (y_test_pred >= 0).astype(int)

y_train_binary = (y_train >= 0).astype(int)
y_valid_binary = (y_valid >= 0).astype(int)
y_test_binary = (y_test >= 0).astype(int)

print(classification_report(y_test_binary, y_test_pred_binary))

              precision    recall  f1-score   support

           0       0.68      0.50      0.58      1373
           1       0.60      0.76      0.67      1349

    accuracy                           0.63      2722
   macro avg       0.64      0.63      0.62      2722
weighted avg       0.64      0.63      0.62      2722



In [719]:
# Group by 'datadate' and calculate the count of rows and the mean of 'next_prccq_change'
df_avg = results.groupby('datadate').agg(
    stocks=('next_prccq_change', 'size'),
    baseline=('next_prccq_change', 'mean')
).reset_index()


# Add a 'top_10' column that contains the average of the top 10 highest 'y_pred' for each quarter
df_avg['top_10'] = results.groupby('datadate').apply(
    lambda x: x.nlargest(10, 'y_pred')['next_prccq_change'].mean()
).reset_index(level=0, drop=True)


df_avg['top_10_ideal'] = results.groupby('datadate').apply(
    lambda x: x.nlargest(10, 'next_prccq_change')['next_prccq_change'].mean()
).reset_index(level=0, drop=True)


df_avg['bottom_10'] = results.groupby('datadate').apply(
    lambda x: x.nsmallest(10, 'y_pred')['next_prccq_change'].mean()
).reset_index(level=0, drop=True)


df_avg['bottom_10_ideal'] = results.groupby('datadate').apply(
    lambda x: x.nsmallest(10, 'next_prccq_change')['next_prccq_change'].mean()
).reset_index(level=0, drop=True)

# Show the first few rows of the new dataframe
df_avg.head(10)


Unnamed: 0,datadate,stocks,baseline,top_10,top_10_ideal,bottom_10,bottom_10_ideal
0,2013-03-31,63,0.065437,0.152438,0.268833,0.00111,-0.084238
1,2013-06-30,64,0.059808,0.096976,0.230842,0.099633,-0.061988
2,2013-09-30,66,0.110123,0.22656,0.314411,0.014327,-0.058148
3,2013-12-31,66,0.032754,0.076488,0.183728,-0.006577,-0.082451
4,2014-03-31,66,0.039382,0.097259,0.158701,0.026445,-0.0613
5,2014-06-30,67,0.02267,0.054786,0.167058,0.011773,-0.099601
6,2014-09-30,65,0.054048,0.140578,0.196296,-0.049027,-0.109057
7,2014-12-31,64,0.02715,0.075584,0.177872,-0.004105,-0.122243
8,2015-03-31,65,0.009599,0.06087,0.175274,0.016432,-0.111613
9,2015-06-30,63,-0.056546,0.001166,0.09689,-0.117609,-0.220885


In [717]:
for portfolio in ['baseline', 'top_10']:

    mean_return = df_avg[portfolio].mean()

    # Calculate the standard deviation of the returns of the top 10 portfolio
    std_return = df_avg[portfolio].std()

    # Assuming the risk-free rate is 0 (if you have a specific risk-free rate, replace 0 with that value)
    risk_free_rate = 0.025

    # Calculate the Sharpe ratio
    sharpe_ratio = (mean_return - risk_free_rate) / std_return

    # Print the Sharpe ratio
    print(f"Mean quarterly return of the {portfolio} portfolio: {mean_return}")
    print(f"Sharpe Ratio of the {portfolio} portfolio: {sharpe_ratio}")
    print("")

Mean quarterly return of the baseline portfolio: 0.03360167684373077
Sharpe Ratio of the baseline portfolio: 0.14378314191983466

Mean quarterly return of the top_10 portfolio: 0.10251447836443624
Sharpe Ratio of the top_10 portfolio: 0.9569300654687405



In [718]:
import plotly.graph_objects as go

# Assuming you have a dataframe 'df_avg' with columns 'datadate', 'baseline_portfolio', and 'top_10'

# Calculate the cumulative return for both portfolios
df_avg['baseline_cum_return'] = (1 + df_avg['baseline']).cumprod()
df_avg['top_10_cum_return'] = (1 + df_avg['top_10']).cumprod()

# Calculate the value over time of a $1,000 investment
initial_investment = 1000
df_avg['baseline_value'] = initial_investment * df_avg['baseline_cum_return']
df_avg['top_10_value'] = initial_investment * df_avg['top_10_cum_return']

# Create the plot
fig = go.Figure()

# Add baseline portfolio trace
fig.add_trace(go.Scatter(x=df_avg['datadate'], y=df_avg['baseline_value'],
                         mode='lines', name='Baseline Portfolio'))

# Add top_10 portfolio trace
fig.add_trace(go.Scatter(x=df_avg['datadate'], y=df_avg['top_10_value'],
                         mode='lines', name='Top 10 Portfolio'))

# Update the layout
fig.update_layout(
    title='Portfolio Value Over Time ($1,000 Investment)',
    xaxis_title='Date',
    yaxis_title='Portfolio Value ($)',
)

# Show the plot
fig.show()
