In [13]:
import yfinance as yf
import pandas as pd

# Your existing stock data
tickers = ["NVDA"]  # Replace with your tickers
start_date = "2020-01-01"
end_date = "2024-12-31"

# Download stock data (this already fetches adjusted prices, dividends, etc.)
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)

# Ensure we use 'Close' price
close_prices = data["Close"]

# Fetch fundamental data (without re-calling yf.Ticker)
stock = yf.Ticker("NVDA")  # We still need this for fundamentals

# Get Net Income and Shares Outstanding (avoid redundant API calls)
net_income = stock.income_stmt.loc["Net Income"]  # Quarterly
shares_outstanding = stock.balance_sheet.loc["Ordinary Shares Number"]  # Quarterly

# Convert index to datetime and ensure alignment
net_income.index = pd.to_datetime(net_income.index)
shares_outstanding.index = pd.to_datetime(shares_outstanding.index)

# Compute Quarterly EPS
eps_quarterly = net_income / shares_outstanding

# Forward-fill EPS values to estimate daily EPS
eps_daily = eps_quarterly.reindex(close_prices.index).fillna(method="ffill")

# Compute daily P/E ratio
pe_ratios = close_prices / eps_daily
pe_ratios


[*********************100%***********************]  1 of 1 completed
  eps_daily = eps_quarterly.reindex(close_prices.index).fillna(method="ffill")
  eps_daily = eps_quarterly.reindex(close_prices.index).fillna(method="ffill")


Unnamed: 0_level_0,NVDA,2020-01-02 00:00:00,2020-01-03 00:00:00,2020-01-06 00:00:00,2020-01-07 00:00:00,2020-01-08 00:00:00,2020-01-09 00:00:00,2020-01-10 00:00:00,2020-01-13 00:00:00,2020-01-14 00:00:00,...,2024-12-16 00:00:00,2024-12-17 00:00:00,2024-12-18 00:00:00,2024-12-19 00:00:00,2024-12-20 00:00:00,2024-12-23 00:00:00,2024-12-24 00:00:00,2024-12-26 00:00:00,2024-12-27 00:00:00,2024-12-30 00:00:00
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-02,,,,,,,,,,,...,,,,,,,,,,
2020-01-03,,,,,,,,,,,...,,,,,,,,,,
2020-01-06,,,,,,,,,,,...,,,,,,,,,,
2020-01-07,,,,,,,,,,,...,,,,,,,,,,
2020-01-08,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-23,,,,,,,,,,,...,,,,,,,,,,
2024-12-24,,,,,,,,,,,...,,,,,,,,,,
2024-12-26,,,,,,,,,,,...,,,,,,,,,,
2024-12-27,,,,,,,,,,,...,,,,,,,,,,


In [None]:

# Clean up and display results
pe_ratios_df = pe_ratios.bfill().to_frame(name="P/E Ratio")


In [10]:
"""
stock_classification_2009_2015.py

Script to train a 3-class classifier (-1, 0, +1) on daily forward returns of 21
non-dividend-paying stocks from 2009-01-01 through 2015-11-30, with
features including P/E ratio and rolling averages (5, 20, 90 days) of
returns, volatility, and volume.

Train period: 2009-01-01 to 2014-12-31
Test period:  2015-01-01 to 2015-11-30
"""

from sklearn.model_selection import train_test_split
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler

#############################################
# 1. SELECT 21 TICKERS (NON-DIV PAYING)
#############################################

# These are example tickers that (mostly) had no dividend 2009–2015
# and existed in that period. Replace if needed.
tickers = [
    "AMZN", "GOOG", "TSLA", "CRM", "ADBE", "BIIB", "ISRG", "VRTX", "REGN",
    "ILMN", "EBAY", "BIDU", "NFLX", "BKNG", "LULU", "ADSK", "BRK-B", "BMRN",
    "ALGN", "META", "EA"
]

start_date = "2009-01-01"
end_date = "2015-11-30"

#############################################
# 2. DOWNLOAD DATA USING YFINANCE
#############################################
# We'll get daily data (Open/High/Low/Close/Adj Close/Volume) for each ticker.

print("Downloading data from yfinance...\n")
data = yf.download(
    tickers,
    start=start_date,
    end=end_date,
    auto_adjust=False  # so that we keep 'Adj Close' and can see raw dividends if any
)

[************          24%                       ]  5 of 21 completed

Downloading data from yfinance...



[*********************100%***********************]  21 of 21 completed


In [12]:
data.columns

MultiIndex([('Adj Close',  'ADBE'),
            ('Adj Close',  'ADSK'),
            ('Adj Close',  'ALGN'),
            ('Adj Close',  'AMZN'),
            ('Adj Close',  'BIDU'),
            ('Adj Close',  'BIIB'),
            ('Adj Close',  'BKNG'),
            ('Adj Close',  'BMRN'),
            ('Adj Close', 'BRK-B'),
            ('Adj Close',   'CRM'),
            ...
            (   'Volume',  'EBAY'),
            (   'Volume',  'GOOG'),
            (   'Volume',  'ILMN'),
            (   'Volume',  'ISRG'),
            (   'Volume',  'LULU'),
            (   'Volume',  'META'),
            (   'Volume',  'NFLX'),
            (   'Volume',  'REGN'),
            (   'Volume',  'TSLA'),
            (   'Volume',  'VRTX')],
           names=['Price', 'Ticker'], length=126)