In [1]:
import yfinance as yf
import pandas as pd
import os

tickers = ['AAPL', 'MSFT', 'AMZN', 'GOOG', 'TSLA', 'META', 'JPM', 'NFLX', '^GSPC']

start_date = '2017-07-01'
end_date = '2025-07-01'

combined_df = pd.DataFrame()

for ticker in tickers:
    print(f"Downloading data for {ticker}...")
    df = yf.download(ticker, start=start_date, end=end_date, auto_adjust=False)
    
    if df.empty:
        print(f"⚠️ No data found for {ticker}")
        continue

    df.reset_index(inplace=True)

    if 'Adj Close' in df.columns:
        price_col = 'Adj Close'
    else:
        price_col = 'Close'

    df = df[['Date', price_col]].rename(columns={price_col: ticker})

    df.to_csv(f'../data/{ticker}.csv', index=False)

    if combined_df.empty:
        combined_df = df
    else:
        combined_df = pd.merge(combined_df, df, on='Date', how='outer')

combined_df.to_csv('../data/all_stocks_2017_2025.csv', index=False)

print("\n✅ Data downloaded and saved successfully.")


Downloading data for AAPL...


[*********************100%***********************]  1 of 1 completed


Downloading data for MSFT...


[*********************100%***********************]  1 of 1 completed


Downloading data for AMZN...


[*********************100%***********************]  1 of 1 completed
  combined_df = pd.merge(combined_df, df, on='Date', how='outer')


Downloading data for GOOG...


[*********************100%***********************]  1 of 1 completed


Downloading data for TSLA...


[*********************100%***********************]  1 of 1 completed


Downloading data for META...


[*********************100%***********************]  1 of 1 completed


Downloading data for JPM...


[*********************100%***********************]  1 of 1 completed


Downloading data for NFLX...


[*********************100%***********************]  1 of 1 completed


Downloading data for ^GSPC...


[*********************100%***********************]  1 of 1 completed


✅ Data downloaded and saved successfully.





In [2]:
import pandas as pd

# Load file skipping no rows
df = pd.read_csv('../data/all_stocks_2017_2025.csv')

# Drop the first row
df_cleaned = df.iloc[1:].reset_index(drop=True)

# Save the cleaned version (overwrite or new name)
df_cleaned.to_csv('../data/all_stocks_2017_2025.csv', index=False)

print("✅ First row removed and file saved.")


✅ First row removed and file saved.


In [3]:
import pandas as pd
import os

# List of ticker symbols
tickers = ['AAPL', 'MSFT', 'AMZN', 'GOOG', 'TSLA', 'META', 'JPM', 'NFLX', '^GSPC']

# Path to the data folder
data_path = '../data/'

# Clean each CSV by removing the first row
for ticker in tickers:
    filename = f"{ticker}.csv"
    filepath = os.path.join(data_path, filename)

    try:
        df = pd.read_csv(filepath)

        if len(df) > 1:
            df_cleaned = df.iloc[1:].reset_index(drop=True)
            df_cleaned.to_csv(filepath, index=False)
            print(f"✅ Cleaned: {filename}")
        else:
            print(f"⚠️ Skipped (not enough rows): {filename}")
    except FileNotFoundError:
        print(f"❌ File not found: {filename}")


✅ Cleaned: AAPL.csv
✅ Cleaned: MSFT.csv
✅ Cleaned: AMZN.csv
✅ Cleaned: GOOG.csv
✅ Cleaned: TSLA.csv
✅ Cleaned: META.csv
✅ Cleaned: JPM.csv
✅ Cleaned: NFLX.csv
✅ Cleaned: ^GSPC.csv
