In [1]:
from google.genai import types
from google import genai
import os
import time
import pandas as pd
import random
import yfinance as yf
import numpy as np
from scipy.stats import uniform, beta
from datetime import date,datetime,timedelta
import pickle
from pathlib import Path

os.system("rsync -aE --delete '../../shared_infrastructure' './'")
from shared_infrastructure.WilliamXXu_numerical import *
from shared_infrastructure.WilliamXXu_core import *


nation_ending={'UK':'.L','US':''}
client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))

In [None]:
symbols = dict()
symbols['tech'] = ['AAPL', 'PYPL', 'GOOGL', 'QCOM', 'MTCH']
symbols['petrochem'] = ['DOW', 'XOM', 'OXY', 'CVX', 'SLB']
symbols['retail'] = ['TGT', 'UPS', 'NKE']
symbols['pharma'] = ['AZN', 'PFE', 'MRK', 'BMY', 'JNJ', 'TMO']
symbols['insurance'] = ['UNH', 'HUM']
symbols['metal'] = ['VALE', 'RIO']
symbols['zhong'] = ['JD']
symbols['food'] = ['KHC', 'PEP', 'DEO']
symbols['hype'] = ['TSLA', 'AVGO']
symbols['bill'] = ['TLT', 'IEF']
symbols['transport'] = ['DAL', 'CCL']
symbols['utilities'] = ['NEE', 'VZ','PG']
symbols['cars'] = ['F', 'GM']

symbols=[k for p in symbols.values() for k in p]
#uk_symbols=[k+'.L' for k in ['UTG','DOM','GLEN','DGE','POLR','IPX','WPP','SAFE']]


In [3]:

def summary(full_name,nation,client):
    question='Please summarise the business of '+nation+' company '+full_name+'. Return one sentence within 500 characters.'
    response = client.models.generate_content(
    model="gemini-2.0-flash", contents=question,config=types.GenerateContentConfig(
        tools=[types.Tool(
            google_search=types.GoogleSearchRetrieval
        )]
    )
)
    return response.text


In [4]:

def stock_list_annotation(tickers, client,nation,sorting=False):
    """Get top N intraday losers from a predefined S&P 500 ticker list with company details
    
    Returns DataFrame with company name, sector, market cap, current price, and percentage change.
    Includes random sleep intervals between API calls to prevent rate limiting.
    """
    cpgs=tickers
    if isinstance(cpgs,pd.Series):
        cpgs=pd.DataFrame(cpgs)
    if isinstance(cpgs,pd.DataFrame):
        annotated_df=stock_list_annotation(list(cpgs.index),client,nation)
        return pd.concat([cpgs,annotated_df],axis=1)
    else:
        cpgs=list(cpgs)
    tickers=cpgs
    losers = []
    ending=nation_ending[nation]
    def get_market_cap_formatted(market_cap):
        """Convert market cap to readable format"""
        if not market_cap:
            return 'N/A'
        billion = 1_000_000_000
        if market_cap >= billion:
            return f"${round(market_cap/billion, 1)}B"
        else:
            return f"${round(market_cap/1_000_000, 1)}M"
    
    for ticker in tickers:
        print(ticker)
        try:
            #Random sleep between 0 and 3 seconds
            
            stock = yf.Ticker(ticker+ending)
            data = stock.history(period='1d', interval='1m')
            dic={
                'Ticker': ticker,
                'Company Name': stock.info.get('longName', 'N/A'),
                'Sector': stock.info.get('sector', 'N/A'),
                'Market Cap': get_market_cap_formatted(stock.info.get('marketCap', None)),
            }

            dic['Summary']=summary(dic['Company Name'],nation,client)
            print(dic['Summary'])
            
        except Exception as e:
            dic=dict()
        losers.append(dic)
        sleep_time=np.random.gamma(1,1)
        print(sleep_time)
        time.sleep(sleep_time)
    losers_df = pd.DataFrame(losers)
    if len(losers_df) == 0:
        return pd.DataFrame(columns=['Ticker', 'Company Name', 'Sector', 'Market Cap', 'Summary'])
    res=losers_df[['Ticker', 'Company Name', 'Sector', 'Market Cap', 'Summary']].reset_index(drop=True)
    #losers_df = losers_df.sort_values(by='% Change')
    if sorting:
        res.sort_values(by=['Market Cap'],inplace=True)
    return res


symbols=pd.read_csv('../data/active_options_stocks/active_stock_options.csv')
symbols.set_index('Ticker',inplace=True)

symbols

res=stock_list_annotation(symbols.iloc[:10,],client,'US')
#res.to_csv('../data/active_stock_options_annotation.csv', index=False)


def stock_list_annotation(tickers, client,nation):
    """Get top N intraday losers from a predefined S&P 500 ticker list with company details
    
    Returns DataFrame with company name, sector, market cap, current price, and percentage change.
    Includes random sleep intervals between API calls to prevent rate limiting.
    """
    losers = []
    ending=nation_ending[nation]
    def get_market_cap_formatted(market_cap):
        """Convert market cap to readable format"""
        if not market_cap:
            return 'N/A'
        billion = 1_000_000_000
        if market_cap >= billion:
            return f"${round(market_cap/billion, 1)}B"
        else:
            return f"${round(market_cap/1_000_000, 1)}M"
    
    for ticker in tickers:
        print(ticker)
        try:
            #Random sleep between 0 and 3 seconds
            


            stock = yf.Ticker(ticker+ending)
            data = stock.history(period='1d', interval='1m')
            dic={
                'Ticker': ticker,
                'Company Name': stock.info.get('longName', 'N/A'),
                'Sector': stock.info.get('sector', 'N/A'),
                'Market Cap': get_market_cap_formatted(stock.info.get('marketCap', None)),
            }

            dic['Summary']=summary(dic['Company Name'],nation,client)
            print(dic['Summary'])
            
        except Exception as e:
            dic=dict()
        losers.append(dic)
        sleep_time=np.random.gamma(4,5)
        print(sleep_time)
        time.sleep(sleep_time)
    losers_df = pd.DataFrame(losers)
    if len(losers_df) == 0:
        return pd.DataFrame(columns=['Ticker', 'Company Name', 'Sector', 'Market Cap', 'Summary'])
    
    #losers_df = losers_df.sort_values(by='% Change')
    return losers_df[['Ticker', 'Company Name', 'Sector', 'Market Cap', 'Summary']].reset_index(drop=True)
