In [1]:
from google.genai import types
from google import genai
import os
import time
import pandas as pd
import random
import yfinance as yf

import csv
import numpy as np
from scipy.stats import uniform, beta
nation_ending={'UK':'.L','US':''}

In [2]:
def gamma_param(mean, std):
    """
    Creates a gamma distribution with a specified mean (μ) and standard deviation (σ).

    Parameters:
        mean (float): Desired mean of the distribution (must be > 0).
        std (float): Desired standard deviation of the distribution (must be > 0).

    Returns:
        scipy.stats._distn_infrastructure.rv_frozen: A frozen gamma distribution object.

    Raises:
        ValueError: If mean or std is non-positive.
    """
    if mean <= 0 or std <= 0:
        raise ValueError("Mean and standard deviation must be positive.")

    # Calculate shape (alpha) and scale (theta) parameters
    alpha = (mean ** 2) / (std ** 2)  # Shape parameter (k or a)
    theta = (std ** 2) / mean          # Scale parameter

    # Create and return the gamma distribution
    return alpha,theta

In [49]:
def beta_param(mu,sigma2):
    a=mu*(mu*(1-mu)/sigma2 - 1)
    b = a*(1-mu)/mu
    return a,b 

In [77]:
gamma_param(10,10)

(1.0, 10.0)

In [2]:
client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
def summary(full_name,nation,client):
    question='Please summarise the business of '+nation+' company '+full_name+'. Return one sentence within 500 characters.'
    response = client.models.generate_content(
    model="gemini-2.0-flash", contents=question,config=types.GenerateContentConfig(
        tools=[types.Tool(
            google_search=types.GoogleSearchRetrieval
        )]
    )
)
    return response.text


In [3]:
annotated_list=pd.read_csv('data/sp500_annotation.csv',index_col=0)


In [6]:
annotated_list.copy(deep=True).iloc[374:,]


Unnamed: 0_level_0,Exchange,Shortname,Longname,Sector,Industry,Currentprice,Marketcap,Ebitda,Revenuegrowth,City,State,Country,Fulltimeemployees,Longbusinesssummary,Weight
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
FSLR,NMS,"First Solar, Inc.","First Solar, Inc.",Technology,Solar,182.39,19526307840,1.716966e+09,0.108,Tempe,AZ,United States,6700.0,"First Solar, Inc., a solar technology company,...",0.000351
BF-B,NYQ,Brown Forman Inc,Brown-Forman Corporation,Consumer Defensive,Beverages - Wineries & Distilleries,41.28,19369400320,1.198000e+09,-0.084,Louisville,KY,United States,5700.0,"Brown-Forman Corporation, together with its su...",0.000348
LDOS,NYQ,"Leidos Holdings, Inc.","Leidos Holdings, Inc.",Technology,Information Technology Services,144.81,19322431488,2.057000e+09,0.069,Reston,VA,United States,47000.0,"Leidos Holdings, Inc., together with its subsi...",0.000348
CFG,NYQ,"Citizens Financial Group, Inc.","Citizens Financial Group, Inc.",Financial Services,Banks - Regional,43.45,19148457984,,-0.061,Providence,RI,United States,17329.0,"Citizens Financial Group, Inc. operates as the...",0.000345
LH,NYQ,Labcorp Holdings Inc.,Labcorp Holdings Inc.,Healthcare,Diagnostics & Research,228.61,19120779264,1.673500e+09,0.074,Burlington,NC,United States,67000.0,Labcorp Holdings Inc. provides laboratory serv...,0.000344
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CZR,NMS,"Caesars Entertainment, Inc.","Caesars Entertainment, Inc.",Consumer Cyclical,Resorts & Casinos,32.82,6973593600,3.668000e+09,-0.040,Reno,NV,United States,51000.0,"Caesars Entertainment, Inc. operates as a gami...",0.000125
BWA,NYQ,BorgWarner Inc.,BorgWarner Inc.,Consumer Cyclical,Auto Parts,31.88,6972155904,1.882000e+09,-0.048,Auburn Hills,MI,United States,39900.0,"BorgWarner Inc., together with its subsidiarie...",0.000125
QRVO,NMS,"Qorvo, Inc.","Qorvo, Inc.",Technology,Semiconductors,70.85,6697217024,6.731300e+08,-0.052,Greensboro,NC,United States,8700.0,"Qorvo, Inc. engages in development and commerc...",0.000120
FMC,NYQ,FMC Corporation,FMC Corporation,Basic Materials,Agricultural Inputs,50.15,6260525568,7.033000e+08,0.085,Philadelphia,PA,United States,5800.0,"FMC Corporation, an agricultural sciences comp...",0.000113


In [7]:
properties={'Company Name':lambda x:x.info.get('longName'),'Sector':lambda x:x.info.get('sector'),'Market Cap':lambda x:x.info.get('marketCap')}

def stock_list_annotation(annotated_list, client, nation, output_file,to_add=['Company Name','Summary', 'Market0 Cap'],sleep_params=(1,10)):
    """Get company data with real-time CSV output"""
    def get_market_cap_formatted(market_cap):
        """Convert market cap to readable format"""
        if not market_cap:
            return 'N/A'
        billion = 1_000_000_000
        if market_cap >= billion:
            return f"${round(market_cap/billion, 1)}B"
        else:
            return f"${round(market_cap/1_000_000, 1)}M"
    #output_file="data/"+output_file_name+'_annotation.csv'
    fieldnames = ['Ticker']+to_add+list(annotated_list.columns)
    file_exists = os.path.exists(output_file)
    
    with open(output_file, 'a', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        if not file_exists:
            writer.writeheader()

        for ticker,info in annotated_list.iterrows():
            try:
                stock = yf.Ticker(ticker+nation_ending[nation])
                data = stock.history(period='1d', interval='1m')
                #record=info.copy(deep=True)
                
                record = {
                    'Ticker': ticker
                }
                for k in to_add:
                    if k in properties:
                        record[k]=properties[k](stock)
                if 'Summary' in to_add:
                    record['Summary']=summary(record['Company Name'],nation,client)
                print(record['Summary'])
                record.update(info.to_dict())
                writer.writerow(record)  # Immediate write
                csvfile.flush()  # Force OS-level write

            except Exception as e:
                print(f"Error {ticker}: {str(e)}")
                writer.writerow({'Ticker': ticker})  # Partial write

            sleep_time=np.random.gamma(*sleep_params)
            print(sleep_time)
            time.sleep(sleep_time)

In [None]:
stock_list_annotation(annotated_list.iloc[374:,], client,'US', 'data/sp500_annotation_1.csv')

First Solar, Inc. is an American solar technology company that designs, manufactures, and markets photovoltaic solar modules.

11.656417048960437
Brown-Forman manufactures, bottles, and markets a range of alcoholic beverages, including whiskies, tequila, vodka, and wine.

18.079567392438456
Leidos Holdings, Inc. provides technology and engineering services and solutions, including data analytics, cybersecurity, and enterprise IT, to various industries.

4.223072953171488
Citizens Financial Group, headquartered in Providence, RI, offers retail and commercial banking products/services to individuals, small businesses, and institutions.

17.348856214199774
Labcorp provides clinical laboratory services and end-to-end drug development support, offering a range of diagnostic technologies and testing services.

36.543242700006964
VeriSign is a global provider of internet infrastructure and domain name registry services, ensuring the security, stability, and resiliency of the Domain Name Syste

In [26]:
res

Unnamed: 0,Ticker,Company Name,Sector,Market Cap,Summary
0,TESTER,,,,I lack the information to answer. I need the c...
1,MMM,3M Company,Industrials,$82.8B,3M is a multinational conglomerate that manufa...
2,AOS,A. O. Smith Corporation,Industrials,$9.5B,A. O. Smith is a global manufacturer of reside...
3,ABT,Abbott Laboratories,Healthcare,$229.1B,Abbott Laboratories is a global healthcare com...
4,ABBV,AbbVie Inc.,Healthcare,$338.8B,AbbVie is a global biopharmaceutical company t...
...,...,...,...,...,...
382,PRU,"Prudential Financial, Inc.",Financial Services,$40.4B,"Prudential Financial, Inc. is a global financi..."
383,PEG,Public Service Enterprise Group Incorporated,Utilities,$41.9B,Public Service Enterprise Group (PSEG) is a di...
384,PSA,Public Storage,Real Estate,$52.9B,"Public Storage is the world's largest owner, o..."
385,PHM,"PulteGroup, Inc.",Consumer Cyclical,$22.8B,"PulteGroup, Inc. is a homebuilder in the Unite..."


In [7]:
res.to_csv(list_name+'_annotation.csv', index=False)

In [None]:
# list_name='ftse250'


# # ending=''
# filel=open('../gathering/data/'+list_name+'.txt','a+')
# filel.seek(0)
# stocklist=filel.read().splitlines()
# filel.close()

In [None]:
res=stock_list_annotation(stocklist,client,'UK')

In [None]:

# def stock_list_annotation(tickers, client,nation):
#     """Get top N intraday losers from a predefined S&P 500 ticker list with company details
    
#     Returns DataFrame with company name, sector, market cap, current price, and percentage change.
#     Includes random sleep intervals between API calls to prevent rate limiting.
#     """
#     losers = []
#     ending=nation_ending[nation]

    
#     for ticker in tickers:
#         print(ticker)
#         try:
#             #Random sleep between 0 and 3 seconds
            


#             stock = yf.Ticker(ticker+ending)
#             data = stock.history(period='1d', interval='1m')
#             dic={
#                 'Ticker': ticker,
#                 'Company Name': stock.info.get('longName', 'N/A'),
#                 'Sector': stock.info.get('sector', 'N/A'),
#                 'Market Cap': get_market_cap_formatted(stock.info.get('marketCap', None)),
#             }

#             dic['Summary']=summary(dic['Company Name'],nation,client)
#             print(dic['Summary'])
            
#         except Exception as e:
#             dic=dict()
#         losers.append(dic)
#         sleep_time=np.random.gamma(4,5)
#         print(sleep_time)
#         time.sleep(sleep_time)
#     losers_df = pd.DataFrame(losers)
#     if len(losers_df) == 0:
#         return pd.DataFrame(columns=['Ticker', 'Company Name', 'Sector', 'Market Cap', 'Summary'])
    
#     #losers_df = losers_df.sort_values(by='% Change')
#     return losers_df[['Ticker', 'Company Name', 'Sector', 'Market Cap', 'Summary']].reset_index(drop=True)


In [None]:
# annotated_list=pd.read_csv('data/sp500_annotation.csv')
# annotated_list=annotated_list.set_index('Symbol')
# annotated_list.to_csv('data/sp500_annotation.csv')
# list_name='ftse100'

# list_name='SP500'
# # ending=''
# filel=open('../gathering/data/'+list_name+'.txt','a+')
# filel.seek(0)
# stocklist=filel.read().splitlines()
# filel.close()