In [1]:
import yfinance as yf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import statsmodels.api as sm

sp500_df = yf.download('^GSPC', start='2010-01-01')
monthly_sp500 = sp500_df.resample('M').mean()
monthly_sp500['Return'] = (monthly_sp500['Close'].shift(-21) - monthly_sp500['Close']) / monthly_sp500['Close']

sectors_path = '/Users/allison/Desktop/sectorsymbollist.csv'
sectors_df = pd.read_csv(sectors_path)

for sector_symbol in sectors_df['Sector']:
    print(f'Processing sector: {sector_symbol}')
    
    sector_df = yf.download(sector_symbol, start='2010-01-01')
    monthly_sector = sector_df.resample('M').mean()
    monthly_sector['Return'] = (monthly_sector['Close'].shift(-21) - monthly_sector['Close']) / monthly_sector['Close']

    monthly_sp500.dropna(inplace=True)
    monthly_sector.dropna(inplace=True)

    returns_df = pd.DataFrame({
        'SP500_Return': monthly_sp500['Return'],
        'Sector_Return': monthly_sector['Return']
    }).dropna()

    X = returns_df['SP500_Return'].values
    y = returns_df['Sector_Return'].values

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    X_train_sm = sm.add_constant(X_train)  
    model = sm.OLS(y_train, X_train_sm).fit()

    y_pred = model.predict(sm.add_constant(X_test))

    r2 = model.rsquared

    print(f'Sector: {sector_symbol}')
    print(f'Intercept: {model.params[0]}')
    print(f'Coefficients: {model.params[1:]}')
    print(f'R^2 Score: {r2}')
    print(f'P-Values: \n{model.pvalues}')



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Processing sector: ^SP500-50
Sector: ^SP500-50
Intercept: -0.0675077122744839
Coefficients: [0.93816799]
R^2 Score: 0.40869887814050876
P-Values: 
[1.09908738e-02 1.34143515e-15]
Processing sector: ^SP500-2550


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Sector: ^SP500-2550
Intercept: 0.10785275228443295
Coefficients: [1.14219748]
R^2 Score: 0.5214308038920453
P-Values: 
[4.12785633e-05 2.96990790e-21]
Processing sector: ^SP500-3010
Sector: ^SP500-3010
Intercept: 0.06894906328056548
Coefficients: [0.63845711]
R^2 Score: 0.43477739915341573
P-Values: 
[7.81053633e-05 8.31162257e-17]
Processing sector: ^GSPE


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Sector: ^GSPE
Intercept: 0.05389824508443403
Coefficients: [0.19863582]
R^2 Score: 0.00545722956816963
P-Values: 
[0.38727697 0.41484376]
Processing sector: ^SP500-40
Sector: ^SP500-40
Intercept: -0.07959239544940346
Coefficients: [1.22275638]
R^2 Score: 0.6634250949431797
P-Values: 
[1.33733756e-04 1.25083649e-30]
Processing sector: ^SP500-35


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed



Intercept: 0.06479436100839739
Coefficients: [0.7779524]
R^2 Score: 0.5060247085116598
P-Values: 
[4.02453775e-04 2.08184228e-20]
Processing sector: ^SP500-20


[*********************100%%**********************]  1 of 1 completed


Sector: ^SP500-20
Intercept: -0.022650545788811788
Coefficients: [0.99375637]
R^2 Score: 0.8200353301585377
P-Values: 
[3.77600044e-02 2.92826987e-47]
Processing sector: ^SP500-15
Sector: ^SP500-15
Intercept: -0.09778200466255563
Coefficients: [1.16720173]
R^2 Score: 0.8339318855110787
P-Values: 
[4.67479321e-13 2.15810081e-49]
Processing sector: ^SP500-60


[*********************100%%**********************]  1 of 1 completed



Intercept: 0.024100204259458677
Coefficients: [0.48034063]
R^2 Score: 0.24306706734522532
P-Values: 
[2.41399445e-01 1.06163053e-08]
Processing sector: ^SP500-45


[*********************100%%**********************]  1 of 1 completed


Sector: ^SP500-45
Intercept: 0.08175775358978023
Coefficients: [1.31227732]
R^2 Score: 0.6617427445972022
P-Values: 
[2.60611756e-04 1.69756906e-30]
Processing sector: ^SP500-55


[*********************100%%**********************]  1 of 1 completed


Intercept: 0.08721361222198437
Coefficients: [0.12094086]
R^2 Score: 0.03751855437622742
P-Values: 
[1.03337055e-08 3.11201977e-02]



