In [13]:
import pandas as pd
import yfinance as yf
from ta import add_all_ta_features
from sklearn.impute import KNNImputer
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)


# List of tickers
tickers = ['AAPL', 'GOOG', 'MSFT', 'AMZN', 'TSLA']  # add as many tickers as you want

dfs = []  # List to store dataframes

# Download the data for all tickers and append into the list
for ticker in tickers:
    tickerData = yf.Ticker(ticker)
    ticker_df = tickerData.history(period='1d', start='2010-1-1', end='2023-1-1')
    
    # Add all ta features
    ticker_df = add_all_ta_features(ticker_df, open="Open", high="High", low="Low", close="Close", volume="Volume")

    dfs.append(ticker_df)

# Concatenate all dataframes
df = pd.concat(dfs, keys=tickers, names=['Ticker', 'Date'])

# Save a copy of the dataframe with ticker information before dropping it for imputation
df_copy = df.copy()

# Drop 'Ticker' level before imputation
df.index = df.index.droplevel('Ticker')

# Initialize KNNImputer
imputer = KNNImputer(n_neighbors=5)

# Apply imputation
df_imputed = imputer.fit_transform(df)

# Convert result back to DataFrame
df = pd.DataFrame(df_imputed, columns=df.columns)

# Restore 'Ticker' level
df['Ticker'] = df_copy.index.get_level_values('Ticker')
df.set_index(['Ticker', df.index], inplace=True)

# Print the DataFrame
print(df)


                    Open        High         Low       Close       Volume  \
Ticker                                                                      
AAPL   0        6.478690    6.511170    6.446818    6.496296  493729600.0   
       1        6.514204    6.544256    6.473224    6.507526  601904800.0   
       2        6.507526    6.533328    6.397337    6.404016  552160000.0   
       3        6.427692    6.435281    6.345733    6.392176  477131200.0   
       4        6.383676    6.435280    6.346036    6.434672  447610800.0   
...                  ...         ...         ...         ...          ...   
TSLA   16233  126.370003  128.619995  121.019997  123.150002  166989700.0   
       16234  117.500000  119.669998  108.760002  109.099998  208643400.0   
       16235  110.349998  116.269997  108.239998  112.709999  221070500.0   
       16236  120.389999  123.570000  117.500000  121.820000  221923300.0   
       16237  119.949997  124.480003  119.750000  123.180000  157777300.0   

In [16]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,volume_adi,volume_obv,volume_cmf,...,momentum_ppo,momentum_ppo_signal,momentum_ppo_hist,momentum_pvo,momentum_pvo_signal,momentum_pvo_hist,momentum_kama,others_dr,others_dlr,others_cr
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AAPL,0,6.478690,6.511170,6.446818,6.496296,493729600.0,0.0,0.0,2.654966e+08,4.937296e+08,0.075008,...,2.107335,2.261243,-0.153908,15.674249,1.979591,13.694658,9.861795,-86.829894,-1.434669,0.000000
AAPL,1,6.514204,6.544256,6.473224,6.507526,601904800.0,0.0,0.0,2.449217e+08,1.095634e+09,0.173217,...,1.843141,1.874978,-0.031837,18.493299,6.581385,11.911914,18.518260,0.172860,0.172711,0.172860
AAPL,2,6.507526,6.533328,6.397337,6.404016,552160000.0,0.0,0.0,-2.530072e+08,5.434744e+08,-0.040892,...,0.994402,1.149229,-0.154827,20.206481,2.840367,17.366114,14.989240,-1.590626,-1.603412,-1.420515
AAPL,3,6.427692,6.435281,6.345733,6.392176,477131200.0,0.0,0.0,-2.352198e+08,6.634320e+07,-0.132552,...,-0.233490,0.749249,-0.982739,19.415313,7.204197,12.211116,12.322199,-0.184874,-0.185046,-1.602763
AAPL,4,6.383676,6.435280,6.346036,6.434672,447610800.0,0.0,0.0,2.062968e+08,5.139540e+08,0.129795,...,2.198274,2.141476,0.056798,15.222475,4.631606,10.590869,12.783320,0.664816,0.662616,-0.948603
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TSLA,16233,126.370003,128.619995,121.019997,123.150002,166989700.0,0.0,0.0,1.041095e+10,1.601419e+10,-0.317077,...,-10.433928,-7.869638,-2.564290,17.689086,13.348539,4.340547,134.895353,-1.755083,-1.770668,7632.313333
TSLA,16234,117.500000,119.669998,108.760002,109.099998,208643400.0,0.0,0.0,1.021531e+10,1.580554e+10,-0.373349,...,-11.838146,-8.663340,-3.174806,19.438434,14.566518,4.871916,124.019029,-11.408853,-12.113826,6750.145046
TSLA,16235,110.349998,116.269997,108.239998,112.709999,221070500.0,0.0,0.0,1.024036e+10,1.602662e+10,-0.330571,...,-12.696081,-9.469888,-3.226193,20.934566,15.840127,5.094438,120.314445,3.308892,3.255326,6976.808915
TSLA,16236,120.389999,123.570000,117.500000,121.820000,221923300.0,0.0,0.0,1.033432e+10,1.624854e+10,-0.322328,...,-12.765484,-10.129007,-2.636477,21.670103,17.006123,4.663981,120.559411,8.082691,7.772640,7548.805491


In [11]:
# Save a copy of the dataframe with ticker information before dropping it for imputation
df_copy = df.copy()

# Drop 'Ticker' level before imputation
df.index = df.index.droplevel('Ticker')

# Initialize KNNImputer
imputer = KNNImputer(n_neighbors=5)

# Apply imputation
df_imputed = imputer.fit_transform(df)

# Convert result back to DataFrame
df = pd.DataFrame(df_imputed, index=df.index, columns=df.columns)

# Restore 'Ticker' level
df['Ticker'] = df_copy.index.get_level_values('Ticker')
df.set_index(['Ticker', df.index], inplace=True)





In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Select features and target
features = df[['momentum_rsi', 'trend_macd', 'volatility_bbh', 'volume_vwap', 'momentum_mfi', 'volume']]
target = (df['trend_macd_signal'] > 0).astype(int)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Perform feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the logistic regression model
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test_scaled)

# Evaluate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


KeyError: "['momentum_mfi', 'volume'] not in index"

In [None]:
df.isnull()

In [None]:
# # Clean NaN values
# df = dropna(df)
# df

In [None]:
# Add all ta features
df = add_all_ta_features(df, open="Open", high="High", low="Low", close="Close", volume="Volume")

In [None]:
# Define features and target
features = df[['momentum_rsi', 'trend_macd', 'volatility_bbh', 'volume_vwap', 'momentum_mfi', 'volume']]
target = (df['trend_macd_signal'] > 0).astype(int)


In [None]:
# Split into train and test set
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)


In [None]:
# Apply normalization
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Training the model
model = LogisticRegression()
model.fit(X_train, y_train)

In [None]:
# Check the accuracy
print("Train accuracy:", model.score(X_train, y_train))
print("Test accuracy:", model.score(X_test, y_test))