In [1]:
# Script to install the required packages for the stock market analysis
#!pip install yfinance
#!pip install pandas
#!pip install matplotlib
#!pip install matplotlib mplfinance
#!pip install seaborn
#!pip install scikit-learn
#!pip install ipywidgets

In [2]:
# set the library
import yfinance as yf
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import datetime
import os
import mplfinance as mpf
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression
import ipywidgets as widgets
from IPython.display import display
import json

In [3]:
# List of major tech stocks to analyze
tech_stocks = ["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA", "NVDA", "META", "AMD", "INTC", "NFLX"]

In [None]:
# Dictionary to store the data
stocks_data = {}

# Loop through each stock in the list
for ticker in tech_stocks:
    stock = yf.Ticker(ticker)
    data = stock.history(period="10y")
    data['Stock'] = ticker  # Add a new column for the stock name
    # Keep only the date part of the datetime
    data.reset_index(inplace=True)  # Ensure 'Date' is a column, not an index
    data['Date'] = data['Date'].dt.date
    stocks_data[ticker] = data
    # Fetch market cap data
    market_cap = stock.info['marketCap']
    data['Market Cap'] = market_cap

# Convert the dictionary to a DataFrame for better visualization
all_stocks_df = pd.concat(stocks_data)
all_stocks_df

Unnamed: 0,Unnamed: 1,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Stock,Market Cap
AAPL,0,2015-03-02,28.865106,29.095133,28.652945,28.829372,192386800,0.0,0.0,AAPL,3564744474624
AAPL,1,2015-03-03,28.800336,28.925399,28.606039,28.889666,151265200,0.0,0.0,AAPL,3564744474624
AAPL,2,2015-03-04,28.831606,28.934334,28.657410,28.706539,126665200,0.0,0.0,AAPL,3564744474624
AAPL,3,2015-03-05,28.715473,28.753438,28.085689,28.230852,226068400,0.0,0.0,AAPL,3564744474624
AAPL,4,2015-03-06,28.675268,28.891896,28.197349,28.273279,291368400,0.0,0.0,AAPL,3564744474624
...,...,...,...,...,...,...,...,...,...,...,...
NFLX,2510,2025-02-21,1029.420044,1032.380005,999.390015,1003.150024,3738700,0.0,0.0,NFLX,411959918592
NFLX,2511,2025-02-24,1008.000000,1015.169983,984.479980,988.469971,4426200,0.0,0.0,NFLX,411959918592
NFLX,2512,2025-02-25,989.400024,994.400024,955.000000,977.239990,4738300,0.0,0.0,NFLX,411959918592
NFLX,2513,2025-02-26,977.599976,1000.890015,976.900024,990.059998,3397200,0.0,0.0,NFLX,411959918592


In [None]:
# Reset the index to separate the date into its own column
all_stocks_df.reset_index(inplace=True)

In [6]:
# Rename 'level_0' to 'Stock'
# Add market cap to the DataFrame
all_stocks_df['Market Cap'] = all_stocks_df['Stock'].map(lambda x: stocks_data[x]['Market Cap'].iloc[0])
all_stocks_df.rename(columns={'level_0': 'Stock'}, inplace=True)

# Rearrange columns to have 'Date' and 'Stock' as the first two columns
columns_to_keep = ['Stock', 'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Market Cap']
all_stocks_df = all_stocks_df[columns_to_keep]

# Display the first few rows of the DataFrame
print(all_stocks_df.head())

  Stock Stock        Date       Open       High        Low      Close  \
0  AAPL  AAPL  2015-03-02  28.865106  29.095133  28.652945  28.829372   
1  AAPL  AAPL  2015-03-03  28.800336  28.925399  28.606039  28.889666   
2  AAPL  AAPL  2015-03-04  28.831606  28.934334  28.657410  28.706539   
3  AAPL  AAPL  2015-03-05  28.715473  28.753438  28.085689  28.230852   
4  AAPL  AAPL  2015-03-06  28.675268  28.891896  28.197349  28.273279   

      Volume     Market Cap  
0  192386800  3564744474624  
1  151265200  3564744474624  
2  126665200  3564744474624  
3  226068400  3564744474624  
4  291368400  3564744474624  


In [7]:
# Remove duplicate 'Stock' column
all_stocks_df = all_stocks_df.loc[:, ~all_stocks_df.columns.duplicated()]

# Dictionary to store individual stock DataFrames
individual_stock_dfs = {}

# Directory to save the CSV files
output_dir = 'stock_data'
os.makedirs(output_dir, exist_ok=True)

# Loop through each stock and create separate DataFrames
for ticker in tech_stocks:
    individual_stock_dfs[ticker] = all_stocks_df[all_stocks_df['Stock'] == ticker].copy()
    # Add market cap to each DataFrame
    individual_stock_dfs[ticker].loc[:, 'Market Cap'] = stocks_data[ticker]['Market Cap'].iloc[0]

    # Save each DataFrame to a CSV file (optional)
    individual_stock_dfs[ticker].to_csv(os.path.join(output_dir, f'{ticker}_stock_data.csv'), index=False)

# Example: Access the DataFrame for Google (GOOGL)
print(individual_stock_dfs['GOOGL'].head())

      Stock        Date       Open       High        Low      Close    Volume  \
5030  GOOGL  2015-03-02  28.247599  28.695475  28.055794  28.647150  50406000   
5031  GOOGL  2015-03-03  28.713409  28.937597  28.421966  28.834970  50526000   
5032  GOOGL  2015-03-04  28.744299  28.976457  28.522602  28.812054  37964000   
5033  GOOGL  2015-03-05  28.876323  29.054675  28.807074  28.966496  35918000   
5034  GOOGL  2015-03-06  28.994891  29.033750  28.499686  28.541534  37592000   

         Market Cap  
5030  2064579952640  
5031  2064579952640  
5032  2064579952640  
5033  2064579952640  
5034  2064579952640  


In [8]:
# Define the folder containing stock data CSV files
stock_folder = "stock_data"

# List all CSV files in the folder
csv_files = [f for f in os.listdir(stock_folder) if f.endswith(".csv")]

# Prepare storage dictionaries
stock_data_dict = {}
stock_list = {"stocks": [], "years": []}

In [9]:
# Process each stock file
for file in csv_files:
    stock_name = file.replace("_stock_data.csv", "").upper()  # Extract stock name
    file_path = os.path.join(stock_folder, file)
    df = pd.read_csv(file_path)
    df['Date'] = pd.to_datetime(df['Date'])  # Convert to datetime format
    df['Year'] = df['Date'].dt.year  # Extract year
    # Drop NaN values to prevent JSON errors
    df = df.dropna()
    # Add stock name to the list
    if stock_name not in stock_list["stocks"]:
        stock_list["stocks"].append(stock_name)
    # Store unique years
    unique_years = df['Year'].unique().tolist()
    stock_list["years"].extend(unique_years)
    # Convert DataFrame to JSON format (nested by year)
    stock_data_dict[stock_name] = {}
    for year in unique_years:
        df_year = df[df['Year'] == year].drop(columns=["Year"])
        if not df_year.empty:  # Only add non-empty years
            stock_data_dict[stock_name][str(year)] = df_year.to_dict(orient='records')


In [10]:

# Ensure unique and sorted years
stock_list["years"] = sorted(set(stock_list["years"]))
# Convert all 'Date' fields to string format for JSON compatibility
for stock, years_data in stock_data_dict.items():
    for year, records in years_data.items():
        for record in records:
            record["Date"] = pd.to_datetime(record["Date"]).strftime('%Y-%m-%d')  # Fix Timestamp issue
# Convert to JavaScript format
js_content = "const stockData = " + json.dumps(stock_data_dict, indent=4) + ";\n"
js_content += "const stockList = " + json.dumps(stock_list, indent=4) + ";"
# Save as a JavaScript file
with open("stock_data.js", "w") as f:
    f.write(js_content)
print("JSON files created successfully!")

JSON files created successfully!
