In [236]:
# Import base dependencies
import pandas as pd
import requests
import time
from datetime import datetime, timedelta
import math
import pyodbc
from config import av_api_key as api_key, seeking_alpha_name, seeking_alpha_pw

# Import dependencies for web scraping
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import numpy as np
import os

# import matplotlib.pyplot as plt
# from sklearn.linear_model import LinearRegression

# Import ML dependencies
# import tensorflow as tf
# from tensorflow import keras
# from keras.models import Sequential
# from keras.layers import SimpleRNN, Flatten, TimeDistributed, LSTM

In [220]:
# Import raw data from platform export
# IRA funds
ira_funds = [{'symbol':"STRL"}, 
             {'symbol':"DXPE", 'basis':105.590}, 
             {'symbol':"TSM"}, 
             {'symbol':"WLDN", 'basis':36.080}, 
             {'symbol':"SSRM"}, 
             {'symbol':"LRN", 'basis':107.868}, 
             {'symbol':"UNFI"}, 
             {'symbol':"MFC"}, 
             {'symbol':"EAT", 'basis':124.038}, 
             {'symbol':"EZPW", 'basis':15.460}, 
             {'symbol':"ARQT", 'basis':15.932}, 
             {'symbol':"WFC", 'basis':57.180}, 
             {'symbol':"ITRN"}, 
             {'symbol':"CRDO", 'basis':0.010}, 
             {'symbol':"PYPL", 'basis':68.073}, 
             {'symbol':"ALL", 'basis':193.027}, 
             {'symbol':"LC", 'basis':5.542}, 
             {'symbol':"QTWO", 'basis':79.148}, 
             {'symbol':"CLS", 'basis':0.010}, 
             {'symbol':"CCL", 'basis':0.010}, 
             {'symbol':"AGX"}, 
             {'symbol':"POWL"}, 
             {'symbol':"PPC"}, 
             {'symbol':"SYF"}, 
             {'symbol':"ATGE", 'basis':116.372}, 
             {'symbol':"BRK-B"}, 
             {'symbol':"SFM"}, 
             {'symbol':"SKYW", 'basis':65.040}, 
             {'symbol':"BLBD"}, 
             {'symbol':"GM"}, 
             {'symbol':"RCL"}, 
             {'symbol':"OKTA", 'basis':92.574}, 
             {'symbol':"TWLO"}, 
             {'symbol':"APP", 'basis':119.318}, 
             {'symbol':"TMUS", 'basis':218.823}, 
             {'symbol':"GRBK"}, 
             {'symbol':"UBER", 'basis':40.612}, 
             {'symbol':"CAAP", 'basis':21.570}
             ]

# Brokerage
brokerage_funds = [{'symbol':'NBIS'}, 
                   {'symbol':"FRSH", 'basis':13.667}, 
                   {'symbol':"PGY", 'basis':28.919}, 
                   {'symbol':"COMM", 'basis':0.010}, 
                   {'symbol':'FINV', 'basis':8.950}, 
                   {'symbol':"LX", 'basis':6.612}, 
                   {'symbol':'BCS'}, 
                   {'symbol':'PUK'}, 
                   {'symbol':'PSIX', 'basis':85.700}, 
                   {'symbol':'NGD', 'basis':4.400}, 
                   {'symbol':'GFI'}, 
                   {'symbol':'BKTI'}, 
                   {'symbol':"SSRM", 'basis':12.210}, 
                   {'symbol':"UNFI"}, 
                   {'symbol':"MFC"}, 
                   {'symbol':"EZPW", 'basis':15.773}, 
                   {'symbol':"ARQT", 'basis':15.996}, 
                   {'symbol':"WFC", 'basis':66.893}, 
                   {'symbol':"ITRN"}, 
                   {'symbol':"PYPL", 'basis':68.292}, 
                   {'symbol':"LC"}, 
                   {'symbol':"QTWO", 'basis':76.435}, 
                   {'symbol':"CCL", 'basis':0.010}, 
                   {'symbol':"PPC"}, 
                   {'symbol':"SYF"}, 
                   {'symbol':"ATGE", 'basis':97.175}, 
                   {'symbol':"SKYW", 'basis':74.583}, 
                   {'symbol':"BLBD"}, 
                   {'symbol':"GM"}, 
                   {'symbol':"OKTA", 'basis':99.535}, 
                   {'symbol':"GRBK"}, 
                   {'symbol':"UBER", 'basis':86.860}, 
                   {'symbol':"CAAP"},
                   {'symbol':"QUBT", 'basis':15.469},
                   {'symbol':"RGTI"}, 
                   {'symbol':"FBTC", 'basis':65.965, 'is_etf': True}
                   ]

In [237]:
def load_access_table_to_df(access_file_path, table_name):
    """
    Loads a table from a Microsoft Access database into a Pandas DataFrame.

    Parameters:
    access_file_path (str): The full path to the Microsoft Access database file (.mdb or .accdb).
    table_name (str): The name of the table to load from the database.

    Returns:
    pd.DataFrame: The DataFrame containing the data from the specified table.

    Notes:
    - Requires the pyodbc library and the Microsoft Access Driver installed on your system.
    - Ensure the bit version (32-bit or 64-bit) of Python matches the installed Access Driver.
    """
    # Connection string for Microsoft Access
    conn_str = (
        r"Driver={Microsoft Access Driver (*.mdb, *.accdb)};"
        r"DBQ=" + access_file_path + ";"
    )
    
    # Establish connection
    conn = pyodbc.connect(conn_str)
    
    # SQL query to select all data from the table
    query = f"SELECT * FROM {table_name}"
    
    # Read data into DataFrame
    df = pd.read_sql(query, conn)
    
    # Close the connection
    conn.close()
    
    return df

In [None]:
def create_summary_dataframe_with_weighted_date(df, reference_date=None):
    """
    Creates a summary DataFrame by aggregating transaction data for each company (TICKER),
    including a weighted average purchase date based on total cost (quantity * purchase price).

    The summary includes:
    - TICKER: The company ticker.
    - ASSET_CLASS: The asset class (assumed consistent per TICKER, takes the first occurrence).
    - SECTOR: The sector (assumed consistent per TICKER, takes the first occurrence).
    - WEIGHTED_AVG_PURCHASE_DATE: The cost-weighted average purchase date.
    - TOTAL_COST: The total cost of all purchases.
    - TOTAL_QUANTITY: The total quantity of shares purchased.
    - AVG_PURCHASE_PRICE: The weighted average purchase price (total cost / total quantity).

    Parameters:
    df (pd.DataFrame): The input DataFrame with columns: ID, TICKER, ASSET_CLASS, SECTOR,
                      ACQUIRED, PURCHASE_PRICE, QUANTITY.
    reference_date (str): Reference date for date-to-numeric conversion (default: '2025-01-01').

    Returns:
    pd.DataFrame: The summary DataFrame with aggregated data per TICKER, including weighted purchase date.
    """
    # Set reference_date to current date if None
    if reference_date is None:
        reference_date = datetime.now().date()

    # Ensure ACQUIRED is in datetime format
    df['ACQUIRED'] = pd.to_datetime(df['ACQUIRED'])
    
    # Calculate the cost for each transaction
    df['COST'] = df['PURCHASE_PRICE'] * df['QUANTITY']
    
    # Convert ACQUIRED date to numeric (days since reference_date)
    reference_date = pd.to_datetime(reference_date)
    df['DAYS_SINCE_REF'] = (df['ACQUIRED'] - reference_date).dt.days
    
    # Calculate weighted days (cost * days since reference)
    df['WEIGHTED_DAYS'] = df['COST'] * df['DAYS_SINCE_REF']
    
    # Group by TICKER and aggregate
    summary_df = df.groupby('TICKER').agg({
        'ASSET_CLASS': 'first',
        'SECTOR': 'first',
        'IS_ETF': 'first',
        'ACQUIRED': 'min',
        'QUANTITY': 'sum',
        'COST': 'sum',
        'WEIGHTED_DAYS': 'sum'
    }).reset_index()
    
    # Rename columns for clarity
    summary_df = summary_df.rename(columns={
        'ACQUIRED': 'FIRST_ACQUIRED',
        'QUANTITY': 'TOTAL_QUANTITY',
        'COST': 'TOTAL_COST'
    })
    
    # Calculate weighted average purchase price
    summary_df['AVG_PURCHASE_PRICE'] = summary_df['TOTAL_COST'] / summary_df['TOTAL_QUANTITY']
    
    # Calculate weighted average days
    summary_df['WEIGHTED_AVG_DAYS'] = summary_df['WEIGHTED_DAYS'] / summary_df['TOTAL_COST']
    
    # Convert weighted average days back to a datetime
    summary_df['WEIGHTED_AVG_PURCHASE_DATE'] = reference_date + pd.to_timedelta(summary_df['WEIGHTED_AVG_DAYS'], unit='D')
    
    # Round the weighted average date to the nearest day for clarity
    summary_df['WEIGHTED_AVG_PURCHASE_DATE'] = summary_df['WEIGHTED_AVG_PURCHASE_DATE'].dt.round('D')
    
    # Reorder columns as requested
    columns_order = [
        'TICKER', 'ASSET_CLASS', 'SECTOR', 'IS_ETF', 'WEIGHTED_AVG_PURCHASE_DATE',
        'TOTAL_COST', 'TOTAL_QUANTITY', 'AVG_PURCHASE_PRICE'
    ]
    summary_df = summary_df[columns_order]
    
    # Drop temporary columns from input DataFrame
    df.drop(['COST', 'DAYS_SINCE_REF', 'WEIGHTED_DAYS'], axis=1, inplace=True)
    
    return summary_df

In [238]:
access_file_path = 'c:/users/culle/onedrive/documents/finances/investments.accdb'
table_name = 'brokerage'

In [268]:
raw_df = load_access_table_to_df(access_file_path,table_name)
test_df = create_summary_dataframe_with_weighted_date(raw_df)

  df = pd.read_sql(query, conn)


In [269]:
def get_history(symbol, api_key, days=252):  # ~1 year default
    url = f"https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol={symbol}&apikey={api_key}&outputsize=full&entitlement=delayed"
    try:
        response = requests.get(url).json()
        if "Time Series (Daily)" not in response:
            error_msg = response.get('Note', response.get('Information', 'Unknown error'))
            print(f"Error fetching price data for {symbol}: {error_msg}")
            print(f"Response keys: {list(response.keys())}")
            return None
        
        time_series = response["Time Series (Daily)"]
        df = pd.DataFrame.from_dict(time_series, orient="index", dtype=float)
        
        # Debug: Print available columns
        # print(f"Columns for {symbol}: {list(df.columns)}")
        
        # Rename columns dynamically
        column_map = {
            col: name for col, name in [
                ("1. open", "Open"), ("2. high", "High"), ("3. low", "Low"),
                ("4. close", "Close"), ("5. volume", "Volume"), ("6. volume", "Volume"),
                ("7. adjusted close", "Adjusted Close"), ("8. dividend amount", "Dividend")
            ] if col in df.columns
        }
        if "5. volume" not in df.columns and "6. volume" not in df.columns:
            print(f"No volume data for {symbol}")
            return None
        
        df = df.rename(columns=column_map)
        df.index = pd.to_datetime(df.index)
        df = df.sort_index().tail(days)
        return df
    except Exception as e:
        print(f"Exception fetching price data for {symbol}: {str(e)}")
        return None

def get_fundamentals(symbol, api_key, current_price):
    url = f"https://www.alphavantage.co/query?function=OVERVIEW&symbol={symbol}&apikey={api_key}&entitlement=delayed"
    try:
        response = requests.get(url).json()
        if not response or "Symbol" not in response:
            error_msg = response.get('Note', response.get('Information', 'No data'))
            print(f"Error fetching fundamentals for {symbol}: {error_msg}")
            print(f"Full response: {response}")
            return None
        
        def safe_float(value, default):
            if value in [None, 'None', '']:
                return default
            try:
                return float(value)
            except (ValueError, TypeError):
                return default
        
        pe_ratio = safe_float(response.get('PERatio'), float('inf'))
        pb_ratio = safe_float(response.get('PriceToBookRatio'), float('inf'))
        
        # Calculate EPS and Book Value
        eps = current_price / pe_ratio if pe_ratio != float('inf') and pe_ratio != 0 else 0
        book_value = current_price / pb_ratio if pb_ratio != float('inf') and pb_ratio != 0 else 0
        
        fundamentals = {
            'pe_ratio': pe_ratio,
            'pb_ratio': pb_ratio,
            'dividend_yield': safe_float(response.get('DividendYield'), 0),
            'debt_to_equity': safe_float(response.get('DebtToEquityRatio'), float('inf')),
            'eps': eps,
            'book_value': book_value
        }
        
        return fundamentals
    except Exception as e:
        print(f"Exception fetching fundamentals for {symbol}: {str(e)}")
        return None

def calculate_vwap(df, days=126):  # ~2 quarters
    if 'Volume' not in df.columns:
        print("Missing Volume column in DataFrame")
        return None
    
    vwap_analysis = df[-days:].copy()
    vwap_analysis['Cumulative_LTPV'] = (vwap_analysis['Low'] * vwap_analysis['Volume']).cumsum()
    vwap_analysis['Cumulative_HTPV'] = (vwap_analysis['High'] * vwap_analysis['Volume']).cumsum()
    vwap_analysis['Cumulative_Volume'] = vwap_analysis['Volume'].cumsum()
    vwap_analysis['Entry'] = round(vwap_analysis['Cumulative_LTPV'] / vwap_analysis['Cumulative_Volume'], 2)
    vwap_analysis['Exit'] = round(vwap_analysis['Cumulative_HTPV'] / vwap_analysis['Cumulative_Volume'], 2)
    return vwap_analysis[-1:].copy()

def build_analysis_table(summary_df, api_key, margin_of_safety=0.9, vwap_days=126, graham_margin=0.95):
    """
    Builds a portfolio analysis table using financial data from Alpha Vantage API,
    with ticker symbols extracted from the provided summary DataFrame.

    Parameters:
    summary_df (pd.DataFrame): Summary DataFrame with at least a 'TICKER' column.
    api_key (str): Alpha Vantage API key for fetching financial data.
    margin_of_safety (float): Margin of safety for VWAP buy threshold (default: 0.9).
    vwap_days (int): Number of days for VWAP calculation (default: 126).
    graham_margin (float): Margin for Graham buy threshold (default: 0.95).

    Returns:
    list: List of lists containing analysis data for each ticker:
          [symbol, market_price, buy_threshold, graham_buy_threshold, exit_price,
           pe_ratio, pb_ratio, dividend_yield, decision]
    """
    # Extract unique tickers from summary_df
    ticker_symbols = [{'symbol': ticker, 'is_etf': False} for ticker in summary_df['TICKER'].unique()]
    
    portfolio = []
    
    for ticker in ticker_symbols:
        symbol = ticker['symbol']
        is_etf = ticker['is_etf']
        
        # Get price data
        raw_data = get_history(symbol, api_key)
        if raw_data is None:
            portfolio.append([symbol, None, None, None, None, None, None, None, "Error"])
            continue
        
        # Get fundamentals (skip for ETFs)
        current_price = raw_data['Close'].iloc[-1]  # Use Close for fundamental calcs
        fundamentals = None if is_etf else get_fundamentals(symbol, api_key, current_price)
        if not is_etf and fundamentals is None:
            portfolio.append([symbol, None, None, None, None, None, None, None, "Error"])
            continue
        
        # Calculate VWAP
        vwap_data = calculate_vwap(raw_data, days=vwap_days)
        if vwap_data is None:
            portfolio.append([symbol, None, None, None, None, None, None, None, "Error"])
            continue
        
        # Extract data
        market_price = round(raw_data['Close'].iloc[-1], 2)  # Use Close for buys
        entry_price = round(vwap_data['Entry'].iloc[0], 2)
        exit_price = round(vwap_data['Exit'].iloc[0], 2)
        buy_threshold = round(entry_price * margin_of_safety, 2)  # 10% margin
        
        # Graham buy threshold (for stocks only)
        graham_buy_threshold = None
        if not is_etf:
            if fundamentals['eps'] > 0 and fundamentals['book_value'] > 0:
                # Calculate desired price where P/E × P/B = 38
                desired_price = math.sqrt(38 * fundamentals['eps'] * fundamentals['book_value'])
                graham_buy_threshold = round(desired_price * graham_margin, 2)  # 5% margin
            else:
                graham_buy_threshold = buy_threshold  # Default to VWAP threshold
        
        # Volume filter: 20% of 21-day average
        avg_volume = raw_data['Volume'][-21:].mean()
        today_volume = raw_data['Volume'].iloc[-1]
        volume_ok = today_volume >= avg_volume * 0.2
        
        # Graham's fundamental checks (for stocks only)
        graham_ok = True
        if not is_etf:
            graham_ok = (
                (fundamentals['pe_ratio'] < 19 and fundamentals['pb_ratio'] < 2.0) or
                (fundamentals['pe_ratio'] * fundamentals['pb_ratio'] < 38 and 
                 fundamentals['pe_ratio'] < 100 and fundamentals['pb_ratio'] < 10)
                 ) and fundamentals['dividend_yield'] >= 0 and fundamentals['debt_to_equity'] < 2
        
        # Decision logic
        decision = "Hold"
        if market_price <= min(buy_threshold, graham_buy_threshold or float('inf')) and volume_ok and graham_ok:
            decision = "Buy"
        elif market_price >= exit_price and volume_ok:
            decision = "Sell"
        
        # Prepare fundamentals for output
        pe_ratio = None if is_etf else fundamentals['pe_ratio']
        pb_ratio = None if is_etf else fundamentals['pb_ratio']
        dividend_yield = None if is_etf else fundamentals['dividend_yield']
        
        portfolio.append([
            symbol, market_price, buy_threshold, graham_buy_threshold, exit_price,
            pe_ratio, pb_ratio, dividend_yield, decision
        ])

        # Convert portfolio list to DataFrame with specified column names
        portfolio_df = pd.DataFrame(portfolio, columns=[
            'ticker', 'price', 'entry_low', 'entry_val', 'exit',
            'P/E', 'P/B', 'DivYield', 'rating'
            ])
        
        # Minimal delay for server stability (75 calls/minute = ~0.8 seconds/call)
        time.sleep(0.1)
    
    return portfolio_df

In [248]:
b_analysis = build_analysis_table(test_df, api_key, margin_of_safety=.99, vwap_days=32)

Error fetching fundamentals for FBTC: No data
Full response: {}
Exception fetching price data for FRSH: ("Connection broken: ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None)", ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))


In [252]:
b_analysis = pd.DataFrame(b_analysis, columns=[
            'ticker', 'price', 'entry_low', 'entry_val', 'exit',
            'P/E', 'P/B', 'DivYield', 'rating'
            ])

In [272]:
test_df

Unnamed: 0,TICKER,ASSET_CLASS,SECTOR,WEIGHTED_AVG_PURCHASE_DATE,TOTAL_COST,TOTAL_QUANTITY,AVG_PURCHASE_PRICE
0,ARQT,Small Cap,Health Care,2025-03-24,1327.6437,83.0,15.995707
1,ATGE,Small Cap,Consumer Discretionary,2025-05-30,874.575,9.0,97.175
2,CCL,Mid Cap,Consumer Discretionary,2026-11-27,-223.627,3.0,-74.542333
3,COMM,Small Cap,Information Technology,2025-08-20,-463.431,63.0,-7.356048
4,EZPW,Small Cap,Financials,2025-05-24,2208.153384,140.0,15.772524
5,FBTC,Crypto,Crypto,2024-06-27,395.79,6.0,65.965
6,FINV,International,Financials,2025-08-01,1007.93,113.0,8.919735
7,FRSH,Small Cap,Information Technology,2025-08-01,1161.6663,85.0,13.666662
8,LX,Small Cap,Financials,2025-07-25,674.429,102.0,6.612049
9,NGD,International,Materials,2025-07-25,651.2,148.0,4.4


In [273]:
test_df.iloc[5]

TICKER                                       FBTC
ASSET_CLASS                                Crypto
SECTOR                                     Crypto
WEIGHTED_AVG_PURCHASE_DATE    2024-06-27 00:00:00
TOTAL_COST                                 395.79
TOTAL_QUANTITY                                6.0
AVG_PURCHASE_PRICE                         65.965
Name: 5, dtype: object

In [274]:
b_analysis.iloc[5]

ticker        FBTC
price          NaN
entry_low      NaN
entry_val      NaN
exit           NaN
P/E            NaN
P/B            NaN
DivYield       NaN
rating       Error
Name: 5, dtype: object

In [None]:
b_analysis[b_analysis['basis']>0]

Unnamed: 0,ticker,basis,price,VWAP,GIIB,exit,P/E,P/B,DivYield,rating
1,FRSH,13.667,12.96,13.35,13.35,14.03,inf,3.928,0.0,Hold
2,PGY,28.919,33.2,27.58,27.58,30.97,inf,7.09,0.0,Sell
3,COMM,0.01,15.02,11.21,2.12,12.44,56.44,30.36,0.0,Sell
4,FINV,8.95,8.89,9.17,19.64,9.66,6.55,1.073,0.0306,Hold
5,LX,6.612,6.81,6.57,19.99,7.05,5.48,0.726,0.0441,Hold
8,PSIX,85.7,83.36,79.78,27.96,91.09,18.81,16.2,0.0,Hold
9,NGD,4.4,5.14,4.47,3.15,4.7,25.2,3.622,0.0,Sell
12,SSRM,12.21,16.14,13.06,20.48,13.86,20.96,1.017,0.0,Sell
15,EZPW,15.773,16.0,13.92,26.9,14.54,12.36,0.981,0.0,Sell
16,ARQT,15.996,15.98,14.1,14.1,15.17,inf,13.49,0.0,Sell


In [224]:
brokerage_df[brokerage_df['basis']==0]

Unnamed: 0,ticker,basis,price,VWAP,GIIB,exit,P/E,P/B,DivYield,rating
0,NBIS,0.0,68.55,54.84,20.11,59.63,83.7,4.763,0.0,Sell
6,BCS,0.0,20.5,18.73,46.66,19.2,9.38,0.706,0.0042,Sell
7,PUK,0.0,26.92,24.76,28.25,25.29,15.92,1.957,0.0087,Sell
10,GFI,0.0,29.77,25.66,16.33,26.89,21.91,5.2,0.018,Sell
11,BKTI,0.0,59.14,42.46,38.24,46.95,16.64,4.93,0.0,Sell
13,UNFI,0.0,26.62,25.06,25.06,26.41,inf,1.022,0.0,Sell
14,MFC,0.0,30.56,30.2,37.83,31.02,13.55,1.651,0.0557,Hold
18,ITRN,0.0,41.41,39.01,29.45,40.57,15.32,4.427,0.047,Sell
20,LC,0.0,15.6,13.68,16.22,14.66,24.64,1.287,0.0,Sell
23,PPC,0.0,47.6,45.89,50.76,47.61,9.56,3.154,0.0,Hold


In [225]:
r_portfolio = build_analysis_table(ira_funds, api_key, margin_of_safety=.975, vwap_days=63)
retirement_df = pd.DataFrame(r_portfolio, 
                             columns=['ticker', 'basis', 'price', 
                                      'VWAP', 'GIIB', 'exit', 
                                      'P/E', 'P/B', 'DivYield', 'rating'])

In [226]:
retirement_df[retirement_df['basis']>0]

Unnamed: 0,ticker,basis,price,VWAP,GIIB,exit,P/E,P/B,DivYield,rating
1,DXPE,105.59,118.96,91.86,72.64,98.68,22.9,4.016,0.0,Sell
3,WLDN,36.08,119.89,72.86,39.1,78.88,49.3,6.54,0.0,Sell
5,LRN,107.868,156.76,137.51,82.36,146.62,26.61,4.669,0.0,Sell
8,EAT,124.038,159.25,158.86,38.66,169.57,21.89,26.58,0.0,Hold
9,EZPW,15.46,16.0,13.29,26.91,14.05,12.36,0.981,0.0,Sell
10,ARQT,15.932,15.99,13.49,13.49,14.63,inf,13.49,0.0,Sell
11,WFC,57.18,79.41,75.5,103.23,79.06,13.38,1.517,0.0201,Sell
13,CRDO,0.01,117.8,82.32,6.11,89.91,417.69,30.5,0.0,Sell
14,PYPL,68.073,69.22,69.34,57.05,73.03,15.12,3.34,0.0,Hold
15,ALL,193.027,208.08,192.36,247.1,201.59,9.83,2.474,0.0186,Sell


In [227]:
retirement_df[retirement_df['basis']==0]

Unnamed: 0,ticker,basis,price,VWAP,GIIB,exit,P/E,P/B,DivYield,rating
0,STRL,0.0,283.72,223.88,92.82,240.76,31.76,10.09,0.0,Sell
2,TSM,0.0,239.97,214.36,96.38,224.49,25.96,8.19,0.0138,Sell
4,SSRM,0.0,16.15,12.28,20.48,13.19,20.96,1.017,0.0,Sell
6,UNFI,0.0,26.63,24.03,24.03,25.95,inf,1.022,0.0,Sell
7,MFC,0.0,30.54,30.17,37.82,31.5,13.55,1.651,0.0557,Hold
12,ITRN,0.0,41.41,36.98,29.45,39.11,15.32,4.427,0.047,Sell
20,AGX,0.0,225.81,204.95,79.7,222.55,32.01,8.6,0.006,Sell
21,POWL,0.0,256.0,197.7,150.78,214.06,18.04,5.48,0.0039,Sell
22,PPC,0.0,47.6,45.27,50.76,47.67,9.56,3.154,0.0,Hold
23,SYF,0.0,72.78,63.17,109.52,66.3,8.82,1.717,0.0145,Sell


In [228]:
'''def setup_driver():
    chrome_options = Options()
    chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")  # Connect to existing Chrome
    driver = webdriver.Chrome(options=chrome_options)  # No need for user-data-dir here
    return driver

def scrape_quant_ratings(symbol, driver, days=63):
    url = f'https://seekingalpha.com/symbol/{symbol}/ratings/quant-ratings'
    driver.get(url)
    time.sleep(5)  # Extra delay for dynamic content
    
    try:
        # Wait for table to load
        WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.TAG_NAME, 'table')))
    except Exception as e:
        driver.save_screenshot(f'error_{symbol}.png')
        with open(f'page_source_{symbol}.html', 'w', encoding='utf-8') as f:
            f.write(driver.page_source)
        print(f"Page load failed for {symbol}: {e}. Saved screenshot and page source for debugging.")
        raise Exception(f"Failed to load page for {symbol}")
    
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    
    # Find the quant rating history table
    table = soup.find('table')
    
    if not table:
        raise Exception("Table not found")
    
    rows = table.find_all('tr')[1:]  # Skip header
    data = []
    for row in rows:
        cols = row.find_all('td')
        if len(cols) >= 4:
            date_str = cols[0].text.strip()
            try:
                date = datetime.strptime(date_str, '%m/%d/%Y')
            except ValueError:
                continue
            quant_score_str = cols[3].text.strip()
            try:
                quant_score = float(quant_score_str)
            except ValueError:
                quant_score = np.nan
            data.append({'date': date, 'quant_score': quant_score})
    
    df = pd.DataFrame(data)
    df = df.sort_values('date', ascending=False)
    
    # Filter last 63 days (current date as 02:24 PM MDT, Aug 10, 2025)
    end_date = datetime(2025, 8, 10, 14, 24)  # 2:24 PM MDT
    start_date = end_date - timedelta(days=days)
    df_filtered = df[(df['date'] >= start_date) & (df['date'] <= end_date)]
    
    return df_filtered

def compute_stats(df):
    scores = df['quant_score'].dropna()
    if len(scores) == 0:
        return {'average': np.nan, 'median': np.nan}
    return {
        'average': scores.mean(),
        'median': scores.median()
    }

# Main function
def main(companies):
    driver = setup_driver()
    try:
        results = {}
        for symbol in companies:
            try:
                df = scrape_quant_ratings(symbol, driver)
                stats = compute_stats(df)
                results[symbol] = stats
            except Exception as e:
                print(f"Error for {symbol}: {e}")
                results[symbol] = {'average': np.nan, 'median': np.nan}
        
        # Output results
        results_df = pd.DataFrame.from_dict(results, orient='index')
        print(results_df)
    finally:
        driver.quit()

# Example usage
companies = ['DXPE', 'AAPL', 'GOOG']  # Replace with your list
main(companies)
'''

'def setup_driver():\n    chrome_options = Options()\n    chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")  # Connect to existing Chrome\n    driver = webdriver.Chrome(options=chrome_options)  # No need for user-data-dir here\n    return driver\n\ndef scrape_quant_ratings(symbol, driver, days=63):\n    url = f\'https://seekingalpha.com/symbol/{symbol}/ratings/quant-ratings\'\n    driver.get(url)\n    time.sleep(5)  # Extra delay for dynamic content\n    \n    try:\n        # Wait for table to load\n        WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.TAG_NAME, \'table\')))\n    except Exception as e:\n        driver.save_screenshot(f\'error_{symbol}.png\')\n        with open(f\'page_source_{symbol}.html\', \'w\', encoding=\'utf-8\') as f:\n            f.write(driver.page_source)\n        print(f"Page load failed for {symbol}: {e}. Saved screenshot and page source for debugging.")\n        raise Exception(f"Failed to load page for {symbol

In [229]:
# Machine Learning

In [230]:
# Data processing and clearning
# Must be in numpy array or tf.Dataset object format

In [231]:
# Feature selection and normalization

In [232]:
# Build model

In [233]:
# Train model

In [234]:
# Evaluate model

In [235]:
# Refine model through hyperparameter tuning