In [None]:
# Import base dependencies
import pandas as pd
import requests
import time
from datetime import datetime, timedelta
import math
from config import av_api_key as api_key

# Import dependencies for web scraping
from bs4 import BeautifulSoup
import numpy as np
# import matplotlib.pyplot as plt
# from sklearn.linear_model import LinearRegression

# Import ML dependencies
# import tensorflow as tf
# from tensorflow import keras
# from keras.models import Sequential
# from keras.layers import SimpleRNN, Flatten, TimeDistributed, LSTM

In [646]:
# Import raw data from platform export
# IRA funds
ira_funds = [{'symbol':"STRL"}, 
             {'symbol':"DXPE", 'basis':112.000}, 
             {'symbol':"TSM"}, 
             {'symbol':"WLDN", 'basis':36.080}, 
             {'symbol':"SSRM"}, 
             {'symbol':"LRN", 'basis':107.868}, 
             {'symbol':"UNFI"}, 
             {'symbol':"MFC"}, 
             {'symbol':"EAT", 'basis':124.038}, 
             {'symbol':"EZPW", 'basis':15.460}, 
             {'symbol':"ARQT", 'basis':15.932}, 
             {'symbol':"WFC", 'basis':57.180}, 
             {'symbol':"ITRN"}, 
             {'symbol':"CRDO", 'basis':0.010}, 
             {'symbol':"PYPL", 'basis':68.073}, 
             {'symbol':"ALL", 'basis':193.027}, 
             {'symbol':"LC", 'basis':5.542}, 
             {'symbol':"QTWO", 'basis':79.148}, 
             {'symbol':"CLS", 'basis':0.010}, 
             {'symbol':"CCL", 'basis':0.010}, 
             {'symbol':"AGX"}, 
             {'symbol':"POWL"}, 
             {'symbol':"PPC"}, 
             {'symbol':"SYF"}, 
             {'symbol':"ATGE", 'basis':116.372}, 
             {'symbol':"BRK-B"}, 
             {'symbol':"SFM"}, 
             {'symbol':"SKYW", 'basis':65.040}, 
             {'symbol':"BLBD"}, 
             {'symbol':"GM"}, 
             {'symbol':"RCL"}, 
             {'symbol':"OKTA", 'basis':92.574}, 
             {'symbol':"TWLO"}, 
             {'symbol':"PEP"}, 
             {'symbol':"APP", 'basis':119.318}, 
             {'symbol':"TMUS", 'basis':218.823}, 
             {'symbol':"GRBK"}, 
             {'symbol':"UBER", 'basis':40.612}, 
             {'symbol':"CAAP", 'basis':21.570}
             ]

# Brokerage
brokerage_funds = [{'symbol':"FRSH", 'basis':13.667}, 
                   {'symbol':"PGY", 'basis':28.919}, 
                   {'symbol':"COMM", 'basis':0.010}, 
                   {'symbol':'FINV', 'basis':8.950}, 
                   {'symbol':"LX", 'basis':6.612}, 
                   {'symbol':'BCS'}, 
                   {'symbol':'PUK'}, 
                   {'symbol':'PSIX'}, 
                   {'symbol':'NGD', 'basis':4.400}, 
                   {'symbol':'GFI'}, 
                   {'symbol':'BKTI'}, 
                   {'symbol':"SSRM", 'basis':12.210}, 
                   {'symbol':"UNFI"}, 
                   {'symbol':"MFC"}, 
                   {'symbol':"EZPW", 'basis':15.773}, 
                   {'symbol':"ARQT", 'basis':15.996}, 
                   {'symbol':"WFC", 'basis':66.893}, 
                   {'symbol':"ITRN"}, 
                   {'symbol':"PYPL", 'basis':68.292}, 
                   {'symbol':"LC"}, 
                   {'symbol':"QTWO", 'basis':76.435}, 
                   {'symbol':"CCL", 'basis':0.010}, 
                   {'symbol':"PPC"}, 
                   {'symbol':"SYF"}, 
                   {'symbol':"ATGE", 'basis':97.175}, 
                   {'symbol':"SKYW", 'basis':74.583}, 
                   {'symbol':"BLBD"}, 
                   {'symbol':"GM"}, 
                   {'symbol':"OKTA", 'basis':99.535}, 
                   {'symbol':"GRBK"}, 
                   {'symbol':"UBER", 'basis':86.860}, 
                   {'symbol':"CAAP"},
                   {'symbol':"QUBT", 'basis':15.469},
                   {'symbol':"RGTI"}, 
                   {'symbol':"FBTC", 'basis':65.965, 'is_etf': True}
                   ]

In [647]:
def get_history(symbol, api_key, days=252):  # ~1 year default
    url = f"https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol={symbol}&apikey={api_key}&outputsize=full&entitlement=delayed"
    try:
        response = requests.get(url).json()
        if "Time Series (Daily)" not in response:
            error_msg = response.get('Note', response.get('Information', 'Unknown error'))
            print(f"Error fetching price data for {symbol}: {error_msg}")
            print(f"Response keys: {list(response.keys())}")
            return None
        
        time_series = response["Time Series (Daily)"]
        df = pd.DataFrame.from_dict(time_series, orient="index", dtype=float)
        
        # Debug: Print available columns
        # print(f"Columns for {symbol}: {list(df.columns)}")
        
        # Rename columns dynamically
        column_map = {
            col: name for col, name in [
                ("1. open", "Open"), ("2. high", "High"), ("3. low", "Low"),
                ("4. close", "Close"), ("5. volume", "Volume"), ("6. volume", "Volume"),
                ("7. adjusted close", "Adjusted Close"), ("8. dividend amount", "Dividend")
            ] if col in df.columns
        }
        if "5. volume" not in df.columns and "6. volume" not in df.columns:
            print(f"No volume data for {symbol}")
            return None
        
        df = df.rename(columns=column_map)
        df.index = pd.to_datetime(df.index)
        df = df.sort_index().tail(days)
        return df
    except Exception as e:
        print(f"Exception fetching price data for {symbol}: {str(e)}")
        return None

def get_fundamentals(symbol, api_key, current_price):
    url = f"https://www.alphavantage.co/query?function=OVERVIEW&symbol={symbol}&apikey={api_key}&entitlement=delayed"
    try:
        response = requests.get(url).json()
        if not response or "Symbol" not in response:
            error_msg = response.get('Note', response.get('Information', 'No data'))
            print(f"Error fetching fundamentals for {symbol}: {error_msg}")
            print(f"Full response: {response}")
            return None
        
        def safe_float(value, default):
            if value in [None, 'None', '']:
                return default
            try:
                return float(value)
            except (ValueError, TypeError):
                return default
        
        pe_ratio = safe_float(response.get('PERatio'), float('inf'))
        pb_ratio = safe_float(response.get('PriceToBookRatio'), float('inf'))
        
        # Calculate EPS and Book Value
        eps = current_price / pe_ratio if pe_ratio != float('inf') and pe_ratio != 0 else 0
        book_value = current_price / pb_ratio if pb_ratio != float('inf') and pb_ratio != 0 else 0
        
        fundamentals = {
            'pe_ratio': pe_ratio,
            'pb_ratio': pb_ratio,
            'dividend_yield': safe_float(response.get('DividendYield'), 0),
            'debt_to_equity': safe_float(response.get('DebtToEquityRatio'), float('inf')),
            'eps': eps,
            'book_value': book_value
        }
        
        return fundamentals
    except Exception as e:
        print(f"Exception fetching fundamentals for {symbol}: {str(e)}")
        return None

def calculate_vwap(df, days=126):  # ~2 quarters
    if 'Volume' not in df.columns:
        print("Missing Volume column in DataFrame")
        return None
    
    vwap_analysis = df[-days:].copy()
    vwap_analysis['Cumulative_LTPV'] = (vwap_analysis['Low'] * vwap_analysis['Volume']).cumsum()
    vwap_analysis['Cumulative_HTPV'] = (vwap_analysis['High'] * vwap_analysis['Volume']).cumsum()
    vwap_analysis['Cumulative_Volume'] = vwap_analysis['Volume'].cumsum()
    vwap_analysis['Entry'] = round(vwap_analysis['Cumulative_LTPV'] / vwap_analysis['Cumulative_Volume'], 2)
    vwap_analysis['Exit'] = round(vwap_analysis['Cumulative_HTPV'] / vwap_analysis['Cumulative_Volume'], 2)
    return vwap_analysis[-1:].copy()

def build_analysis_table(ticker_symbols, api_key, margin_of_safety=0.9, vwap_days=126, graham_margin=0.95):
    portfolio = []
    
    for ticker in ticker_symbols:
        symbol = ticker['symbol']
        is_etf = ticker.get('is_etf', False)
        
        # Get price data
        raw_data = get_history(symbol, api_key)
        if raw_data is None:
            portfolio.append([symbol, ticker.get('basis', 0), None, None, None, None, None, None, None, "Error"])
            continue
        
        # Get fundamentals (skip for ETFs)
        current_price = raw_data['Close'].iloc[-1]  # Use Close for fundamental calcs
        fundamentals = None if is_etf else get_fundamentals(symbol, api_key, current_price)
        if not is_etf and fundamentals is None:
            portfolio.append([symbol, ticker.get('basis', 0), None, None, None, None, None, None, None, "Error"])
            continue
        
        # Calculate VWAP
        vwap_data = calculate_vwap(raw_data, days=vwap_days)
        if vwap_data is None:
            portfolio.append([symbol, ticker.get('basis', 0), None, None, None, None, None, None, None, "Error"])
            continue
        
        # Extract data
        basis = ticker.get('basis', 0)
        market_price = round(raw_data['Close'].iloc[-1], 2)  # Use Low for buys
        entry_price = round(vwap_data['Entry'].iloc[0], 2)
        exit_price = round(vwap_data['Exit'].iloc[0], 2)
        buy_threshold = round(entry_price * margin_of_safety, 2)  # 10% margin
        
        # Graham buy threshold (for stocks only)
        graham_buy_threshold = None
        if not is_etf:
            if fundamentals['eps'] > 0 and fundamentals['book_value'] > 0:
                # Calculate desired price where P/E × P/B = 36
                desired_price = math.sqrt(38 * fundamentals['eps'] * fundamentals['book_value'])
                graham_buy_threshold = round(desired_price * graham_margin, 2)  # 5% margin
            else:
                graham_buy_threshold = buy_threshold  # Default to VWAP threshold
        
        # Volume filter: 20% of 21-day average
        avg_volume = raw_data['Volume'][-21:].mean()
        today_volume = raw_data['Volume'].iloc[-1]
        volume_ok = today_volume >= avg_volume * 0.2
        
        # Graham's fundamental checks (for stocks only)
        graham_ok = True
        if not is_etf:
            graham_ok = (
                (fundamentals['pe_ratio'] < 19 and fundamentals['pb_ratio'] < 2.0) or
                (fundamentals['pe_ratio'] * fundamentals['pb_ratio'] < 38 and 
                 fundamentals['pe_ratio'] < 100 and fundamentals['pb_ratio'] < 10)
                 ) and fundamentals['dividend_yield'] >= 0 and fundamentals['debt_to_equity'] < 2
        
        # Decision logic
        decision = "Hold"
        if market_price <= min(buy_threshold, graham_buy_threshold or float('inf')) and volume_ok and graham_ok:
            decision = "Buy"
        elif market_price >= exit_price and volume_ok:
            decision = "Sell"
        
        # Prepare fundamentals for output
        pe_ratio = None if is_etf else fundamentals['pe_ratio']
        pb_ratio = None if is_etf else fundamentals['pb_ratio']
        dividend_yield = None if is_etf else fundamentals['dividend_yield']
        
        portfolio.append([
            symbol, basis, market_price, 
            buy_threshold, graham_buy_threshold, exit_price,
            pe_ratio, pb_ratio, dividend_yield, decision
        ])
        
        # Minimal delay for server stability (75 calls/minute = ~0.8 seconds/call)
        time.sleep(0.1)
    
    return portfolio

In [648]:
b_portfolio = build_analysis_table(brokerage_funds, api_key, margin_of_safety=.99, vwap_days=63)
brokerage_df = pd.DataFrame(b_portfolio, 
                             columns=['ticker', 'basis', 'price', 
                                      'VWAP', 'GIIB', 'exit', 
                                      'P/E', 'P/B', 'DivYield', 'rating'])

In [649]:
brokerage_df[brokerage_df['basis']>0]

Unnamed: 0,ticker,basis,price,VWAP,GIIB,exit,P/E,P/B,DivYield,rating
0,FRSH,13.667,12.63,13.98,13.98,14.62,inf,3.833,0.0,Hold
1,PGY,28.919,28.67,22.64,22.64,25.06,inf,6.89,0.0,Sell
2,COMM,0.01,15.58,8.57,2.22,9.47,55.52,30.36,0.0,Sell
3,FINV,8.95,9.23,9.04,21.72,9.54,6.16,1.005,0.0324,Hold
4,LX,6.612,6.41,6.91,18.37,7.4,6.01,0.695,0.0271,Hold
8,NGD,4.4,4.88,4.42,3.13,4.62,24.05,3.457,0.0,Sell
11,SSRM,12.21,15.19,12.14,21.24,12.85,19.03,0.922,0.0,Sell
14,EZPW,15.773,15.28,13.47,26.71,14.03,11.94,0.94,0.0,Sell
15,ARQT,15.996,14.77,13.63,13.63,14.56,inf,12.83,0.0,Sell
16,WFC,66.893,77.85,76.49,102.3,78.85,13.24,1.5,0.0206,Hold


In [650]:
brokerage_df[brokerage_df['basis']==0]

Unnamed: 0,ticker,basis,price,VWAP,GIIB,exit,P/E,P/B,DivYield,rating
5,BCS,0.0,19.94,18.05,46.41,18.49,9.19,0.689,0.0043,Sell
6,PUK,0.0,25.91,23.92,28.0,24.44,15.45,1.901,0.0089,Sell
7,PSIX,0.0,99.7,65.18,23.1,73.72,25.74,24.82,0.0,Sell
9,GFI,0.0,31.29,23.58,16.66,24.62,22.56,5.36,0.0175,Sell
10,BKTI,0.0,40.09,44.2,28.61,48.44,15.08,4.467,0.0,Hold
12,UNFI,0.0,26.11,24.45,24.45,25.98,inf,1.025,0.0,Sell
13,MFC,0.0,30.09,30.72,37.77,31.6,13.36,1.629,0.0,Hold
17,ITRN,0.0,42.19,37.31,30.61,38.88,14.99,4.346,0.0482,Sell
19,LC,0.0,14.99,12.37,16.06,13.15,23.91,1.249,0.0,Sell
22,PPC,0.0,49.96,45.78,52.96,47.49,9.62,3.172,0.0,Sell


In [651]:
r_portfolio = build_analysis_table(ira_funds, api_key, margin_of_safety=.99, vwap_days=63)
retirement_df = pd.DataFrame(r_portfolio, 
                             columns=['ticker', 'basis', 'price', 
                                      'VWAP', 'GIIB', 'exit', 
                                      'P/E', 'P/B', 'DivYield', 'rating'])

In [652]:
retirement_df[retirement_df['basis']>0]

Unnamed: 0,ticker,basis,price,VWAP,GIIB,exit,P/E,P/B,DivYield,rating
1,DXPE,112.0,110.01,91.35,72.03,96.57,21.73,3.681,0.0,Sell
3,WLDN,36.08,113.65,68.82,40.19,73.43,52.73,5.2,0.0,Sell
5,LRN,107.868,149.45,139.58,85.07,146.94,24.92,4.247,0.0,Sell
8,EAT,124.038,151.97,161.24,36.17,169.05,22.18,27.3,0.0,Hold
9,EZPW,15.46,15.28,13.47,26.71,14.03,11.94,0.94,0.0,Sell
10,ARQT,15.932,14.77,13.63,13.63,14.56,inf,12.83,0.0,Sell
11,WFC,57.18,77.85,76.49,102.3,78.85,13.24,1.5,0.0206,Hold
13,CRDO,0.01,120.41,80.48,6.32,86.53,413.03,30.17,0.0,Sell
14,PYPL,68.073,67.65,70.65,57.22,73.25,14.86,3.226,0.0,Hold
15,ALL,193.027,206.8,195.12,244.25,201.45,9.83,2.501,0.0184,Sell


In [653]:
retirement_df[retirement_df['basis']==0]

Unnamed: 0,ticker,basis,price,VWAP,GIIB,exit,P/E,P/B,DivYield,rating
0,STRL,0.0,302.69,220.14,96.59,232.59,32.57,10.34,0.0,Sell
2,TSM,0.0,241.83,213.84,94.14,220.65,27.7,8.17,0.0142,Sell
4,SSRM,0.0,15.19,12.14,21.24,12.85,19.03,0.922,0.0,Sell
6,UNFI,0.0,26.11,24.45,24.45,25.98,inf,1.025,0.0,Sell
7,MFC,0.0,30.09,30.72,37.77,31.6,13.36,1.629,0.0,Hold
12,ITRN,0.0,42.19,37.31,30.61,38.88,14.99,4.346,0.0482,Sell
20,AGX,0.0,239.02,206.15,83.29,220.21,32.06,8.81,0.0062,Sell
21,POWL,0.0,243.85,195.57,164.98,208.19,16.21,4.622,0.0047,Sell
22,PPC,0.0,49.96,45.78,52.96,47.49,9.62,3.172,0.0,Sell
23,SYF,0.0,69.43,63.36,110.78,65.49,8.32,1.619,0.0151,Sell


In [None]:
# Note: This script assumes you have a Seeking Alpha premium account and can handle login/authentication.
# Seeking Alpha's quant ratings are behind a paywall, so you'll need to manage cookies or session for authenticated requests.
# For simplicity, this uses requests with a session, but you may need Selenium for full browser simulation if login is complex.
# Replace 'YOUR_EMAIL' and 'YOUR_PASSWORD' with your credentials.
# Be aware of Seeking Alpha's terms of service regarding scraping; this is for educational purposes only.

def login_to_seeking_alpha(session):
    login_url = 'https://seekingalpha.com/account/login'
    # Get the login page to retrieve any necessary tokens (e.g., CSRF)
    response = session.get(login_url)
    soup = BeautifulSoup(response.text, 'html.parser')
    # Find CSRF token if present (adjust based on actual form)
    csrf_token = soup.find('input', {'name': '_csrf'})['value'] if soup.find('input', {'name': '_csrf'}) else None
    
    payload = {
        'email': 'YOUR_EMAIL',
        'password': 'YOUR_PASSWORD',
        'slugs[]': '',
        # Add CSRF if needed: '_csrf': csrf_token,
    }
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Referer': login_url,
    }
    response = session.post(login_url, data=payload, headers=headers)
    if response.status_code == 200 and 'logged_in' in response.text:  # Adjust check based on actual response
        print("Login successful")
    else:
        print("Login failed")
        raise Exception("Unable to login")

def scrape_quant_ratings(symbol, session, days=63):
    url = f'https://seekingalpha.com/symbol/{symbol}/ratings/quant-ratings'
    response = session.get(url)
    if response.status_code != 200:
        raise Exception(f"Failed to fetch page for {symbol}")
    
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find the quant rating history table
    # Based on the screenshot, it's a table with columns: Date, Price, Quant Rating, Quant Score, Valuation, Growth, Profitability, Momentum
    table = soup.find('table')  # Adjust selector if needed, e.g., soup.find('table', {'class': 'quant-history-table'})
    
    if not table:
        raise Exception("Table not found")
    
    rows = table.find_all('tr')[1:]  # Skip header
    data = []
    for row in rows:
        cols = row.find_all('td')
        if len(cols) >= 4:  # At least Date, Price, Quant Rating, Quant Score
            date_str = cols[0].text.strip()
            try:
                date = datetime.strptime(date_str, '%m/%d/%Y')
            except ValueError:
                continue  # Skip invalid dates
            quant_score_str = cols[3].text.strip()
            try:
                quant_score = float(quant_score_str)
            except ValueError:
                quant_score = np.nan
            data.append({'date': date, 'quant_score': quant_score})
    
    df = pd.DataFrame(data)
    df = df.sort_values('date', ascending=False)  # Newest first
    
    # Filter last 63 days
    end_date = datetime.now()  # Or use provided current date: datetime(2025, 8, 8)
    start_date = end_date - timedelta(days=days)
    df_filtered = df[(df['date'] >= start_date) & (df['date'] <= end_date)]
    
    return df_filtered

def compute_stats(df):
    scores = df['quant_score'].dropna()
    if len(scores) == 0:
        return {'average': np.nan, 'median': np.nan}
    return {
        'average': scores.mean(),
        'median': scores.median()
    }

# Main function
def main(companies):
    session = requests.Session()
    login_to_seeking_alpha(session)
    
    results = {}
    for symbol in companies:
        try:
            df = scrape_quant_ratings(symbol, session)
            stats = compute_stats(df)
            results[symbol] = stats
        except Exception as e:
            print(f"Error for {symbol}: {e}")
            results[symbol] = {'average': np.nan, 'median': np.nan}
    
    # Output results
    results_df = pd.DataFrame.from_dict(results, orient='index')
    print(results_df)

# Example usage
companies = ['DXPE', 'AAPL', 'GOOG']  # Replace with your list of company symbols
main(companies)

In [660]:
# Machine Learning

In [661]:
# Data processing and clearning
# Must be in numpy array or tf.Dataset object format

In [662]:
# Feature selection and normalization

In [663]:
# Build model

In [664]:
# Train model

In [665]:
# Evaluate model

In [666]:
# Refine model through hyperparameter tuning