In [1]:
import yfinance as yf
import pandas as pd
import glob
import os

In [2]:
def calculate_technical_indicators(df):
    # Calculate the 20-day EMA
    df['20EMA'] = df['Close'].ewm(span=20, adjust=False).mean()

    # Calculate the 50-day SMA
    df['50SMA'] = df['Close'].rolling(window=50).mean()

    # Calculate RSI (Relative Strength Index)
    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))

    # Round off to 2 decimals
    df = df.round(2)

    return df

def get_vix_data(start_date="2012-01-01"):
    # Download the VIX data from Yahoo Finance
    vix_data = yf.download('^VIX', start=start_date)
    # Keep only the 'Adj Close' column and rename it to 'VIX'
    vix_data = vix_data[['Adj Close']].rename(columns={'Adj Close': 'VIX'}).round(2)
    return vix_data

def download_stock_data(ticker, start_date="2012-01-01"):
    # Download the stock data
    stock_data = yf.download(ticker, start=start_date)
    
    # Add the company name as a new column
    stock_data['company_name'] = ticker
    
    # Calculate technical indicators
    stock_data = calculate_technical_indicators(stock_data)
    
    # Get the VIX data and merge it based on the Date index
    vix_data = get_vix_data(start_date)
    stock_data = stock_data.merge(vix_data, how='left', left_index=True, right_index=True)
    
    # Reset the index to move the date into a column
    stock_data.reset_index(inplace=True)
    
    # Save the result to a CSV file
    csv_filename = f'datasets/{ticker}_data.csv'
    stock_data.to_csv(csv_filename, index=False)
    
    return stock_data

# Example usage with dynamic tickers
tickers = ['AAPL', 'GOOG', 'MSFT', 'AMZN', 'META', 'TSLA', 'NVDA']
for ticker in tickers:
    data = download_stock_data(ticker)
    print(f"Data for {ticker} saved successfully.")


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Data for AAPL saved successfully.


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Data for GOOG saved successfully.


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Data for MSFT saved successfully.



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Data for AMZN saved successfully.



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Data for META saved successfully.



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Data for TSLA saved successfully.



[*********************100%***********************]  1 of 1 completed

Data for NVDA saved successfully.





In [3]:
def merge_csv_files(directory='datasets'):
    all_files = glob.glob(os.path.join(directory, "*_data.csv"))
    dataframes = []
    
    for filename in all_files:
        df = pd.read_csv(filename)
        # Keep only relevant columns: company name and the rest of the data
        df = df[['company_name', 'Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', '20EMA', '50SMA', 'RSI', 'VIX']]
        dataframes.append(df)
    
    if dataframes:  # Check if there's any DataFrame to concatenate
        merged_data = pd.concat(dataframes, ignore_index=True)
        merged_data.dropna(inplace=True)  # Remove rows with null values if needed
        merged_data.to_csv('datasets/merged_stock_data.csv', index=False)
        print("All data merged and saved to merged_stock_data.csv!")
    else:
        print("No data files found to merge.")

# Call the merge function
merge_csv_files()

All data merged and saved to merged_stock_data.csv!
