In [1]:
#Create a DataFrame
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta

# List of 10 companies from S&P 100 (you can modify this list)
sp100_companies = [
    'AAPL',  # Apple
    'MSFT',  # Microsoft
    'AMZN',  # Amazon
    'GOOGL', # Alphabet (Google)
    'META',  # Meta (Facebook)
    'JNJ',   # Johnson & Johnson
    'JPM',   # JPMorgan Chase
    'V',     # Visa
    'PG',    # Procter & Gamble
    'DIS'    # Disney
]

# Calculate date range (last 3 years from today)
end_date = datetime.today()
start_date = end_date - timedelta(days=3*365)

# Function to fetch data for a single stock
def fetch_stock_data(ticker, start_date, end_date):
    try:
        stock = yf.Ticker(ticker)
        df = stock.history(start=start_date, end=end_date)
        df['Ticker'] = ticker  # Add ticker symbol as a column
        return df
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return None

# Fetch data for all companies
all_data = []
for ticker in sp100_companies:
    print(f"Fetching data for {ticker}...")
    data = fetch_stock_data(ticker, start_date, end_date)
    if data is not None:
        all_data.append(data)

# Combine all data into a single DataFrame
if all_data:
    combined_df = pd.concat(all_data)
    
    # Save to CSV
    filename = f"top10_sp100_companies.csv"
    combined_df.to_csv(filename)
    print(f"\nData successfully saved to {filename}")
    
    # Display sample data
    print("\nSample of the fetched data:")
    print(combined_df.head())
else:
    print("No data was fetched. Please check your internet connection or ticker symbols.")

Fetching data for AAPL...
Fetching data for MSFT...
Fetching data for AMZN...
Fetching data for GOOGL...
Fetching data for META...
Fetching data for JNJ...
Fetching data for JPM...
Fetching data for V...
Fetching data for PG...
Fetching data for DIS...

Data successfully saved to top10_sp100_companies.csv

Sample of the fetched data:
                                 Open        High         Low       Close  \
Date                                                                        
2022-04-11 00:00:00-04:00  166.019895  166.334785  162.861072  163.107086   
2022-04-12 00:00:00-04:00  165.340909  167.161401  163.982908  164.986649   
2022-04-13 00:00:00-04:00  164.720953  168.312747  164.110844  167.682953   
2022-04-14 00:00:00-04:00  167.899433  168.539078  162.408405  162.654419   
2022-04-18 00:00:00-04:00  161.306270  163.943545  160.961860  162.437943   

                             Volume  Dividends  Stock Splits Ticker  
Date                                                  

In [2]:
#Preprocess data to train the model
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

def preprocess_data(df, ticker):
    # Filter data for the specific ticker
    stock_df = df[df['Ticker'] == ticker].copy()
    
    # Calculate daily returns
    stock_df['Daily_Return'] = stock_df['Close'].pct_change()
    
    # Calculate moving averages
    stock_df['MA_5'] = stock_df['Close'].rolling(window=5).mean()
    stock_df['MA_20'] = stock_df['Close'].rolling(window=20).mean()
    
    # Calculate Bollinger Bands
    stock_df['Upper_Bollinger'] = stock_df['MA_20'] + 2 * stock_df['Close'].rolling(window=20).std()
    stock_df['Lower_Bollinger'] = stock_df['MA_20'] - 2 * stock_df['Close'].rolling(window=20).std()
    
    # Calculate RSI
    delta = stock_df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    stock_df['RSI'] = 100 - (100 / (1 + rs))
    
    # Drop NaN values
    stock_df.dropna(inplace=True)
    
    # Select features
    features = ['Close', 'Volume', 'Daily_Return', 'MA_5', 'MA_20', 
                'Upper_Bollinger', 'Lower_Bollinger', 'RSI']
    
    # Scale features
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(stock_df[features])
    
    return scaled_data, stock_df, scaler

In [3]:
#Main
df = pd.read_csv('top10_sp100_companies.csv')

scaled_data, stock_df, scaler = preprocess_data(df, 'GOOGL')

stock_df.head(20)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Ticker,Daily_Return,MA_5,MA_20,Upper_Bollinger,Lower_Bollinger,RSI
2275,2022-05-02 00:00:00-04:00,112.865395,116.19,112.064225,116.028275,35534000,0.0,0.0,GOOGL,0.021677,115.877895,126.289095,142.906822,109.671368,35.910214
2276,2022-05-03 00:00:00-04:00,115.876499,117.878433,115.482387,116.775703,24968000,0.0,0.0,GOOGL,0.006442,115.615947,125.013319,140.324765,109.701874,37.951921
2277,2022-05-04 00:00:00-04:00,116.474643,122.269935,114.568253,121.67926,49916000,0.0,0.0,GOOGL,0.041991,117.201665,124.10118,137.75708,110.445279,41.671958
2278,2022-05-05 00:00:00-04:00,119.632053,120.463079,114.458286,115.951149,45840000,0.0,0.0,GOOGL,-0.047075,116.800186,123.103823,136.025889,110.181756,39.44154
2279,2022-05-06 00:00:00-04:00,114.636425,117.012068,113.472989,115.195747,39710000,0.0,0.0,GOOGL,-0.006515,117.126027,122.101514,134.138878,110.064151,37.632443
2280,2022-05-09 00:00:00-04:00,112.711129,114.529439,111.467578,111.975647,40802000,0.0,0.0,GOOGL,-0.027953,116.315501,121.067632,132.838077,109.297187,32.193574
2281,2022-05-10 00:00:00-04:00,114.957887,115.696854,112.363289,113.850677,39900000,0.0,0.0,GOOGL,0.016745,115.730496,120.34964,132.037099,108.662181,36.090431
2282,2022-05-11 00:00:00-04:00,112.697702,115.810817,112.697702,113.061958,37534000,0.0,0.0,GOOGL,-0.006928,114.007036,119.647397,131.313113,107.981682,37.974085
2283,2022-05-12 00:00:00-04:00,110.847546,113.75116,109.301935,112.307068,53836000,0.0,0.0,GOOGL,-0.006677,113.27822,118.798954,129.974088,107.623821,41.952336
2284,2022-05-13 00:00:00-04:00,113.988033,117.314134,113.064454,115.498314,35038000,0.0,0.0,GOOGL,0.028415,113.338733,118.26752,128.97652,107.558521,41.631413
