In [None]:
import financedatabase as fd
import random
import jinja2
import datetime
from operator import attrgetter
from utils import get_yfinance_data
import json 


environment = jinja2.Environment()

In [None]:
simple_price_templates = [
    "What was the price of {{ stock }} on {{ date }}?",
    "What is {{ stock }} currently trading at?",
    "Can you tell me the value of {{ stock }} on {{ date }}?",
    "I'm curious about the price of {{ stock }} on {{ date }}. Any information?",
    "What was the closing price of {{ stock }} on {{ date }}?",
    "Could you provide the historical price of {{ stock }} on {{ date }}?",
    "Give me the stock price for {{ stock }} on {{ date }}.",
    "What's the recorded value of {{ stock }} on {{ date }}?",
    "Do you have data on the stock price of {{ stock}} on {{ date }}?",
    "I'm interested in the price of {{ stock }} on {{ date }}. What do you know?",
    "Provide me with details about {{ stock }} on {{ date }}, specifically its price.",
    "Looking for information on {{ stock }} on {{ date }}. Price details, please.",
    "Tell me the market value of {{ stock }} on {{ date }}.",
    "Could you inform me about the trading price of {{ stock }} on {{ date }}?",
    "What was the opening price of {{ stock }} on {{ date }}?",
    "I'd like to know the stock price of {{ stock }} on {{ date }}. Any insights?",
    "Please share the {{ stock }} price on {{ date }}.",
    "Looking for historical data: {{ stock }} price on {{ date }}.",
    "Could you uncover the stock price of {{ stock }} on {{ date }}?",
    "Inform me about the {{ stock }} stock's value on {{ date }}.",
    "What's the recorded value of {{ stock }} on the specific date, {{ date }}?",
    "Tell me about the stock valuation of {{ stock }} on {{ date }}.",
    "what is the price of {{ stock }}?",
    "price of {{ stock }}",
    "What is the current stock price of {{ stock }}?",
    "What is the current price of {{ stock }}?",
    "What was {{ stock }} trading at on {{ date }}?",
    "What was {{ stock }} worth on {{ date }}?",
    "What was the price of {{ stock }} {{ free_date }}?",
    "What was {{ stock }} worth {{ free_date }}?",
]

free_date_list = [
    'last week',
    '1 year ago',
    'yesterday',
    'last month',
    'last year',
    '2 weeks ago',
    'monday',
    'tuesday',
]    

In [None]:
date_string_formats = [
    '%A, %B %-d, %Y', # Sunday, January 3, 2021
    '%B %Y', # January 2021
    '%B %-d, %Y', # January 3, 2021
    '%B %-d{{ suffix }}', # January 3
    '%b %-d %Y', # Jan 3 2021
    '%-d{{ suffix }} of January %Y', # 3 of January 2021
    '%d/%m/%y', # '11/03/02'
    '%Y-%m-%d', # '2002-03-11'
]

In [None]:
def random_date(end_date=(2023, 8, 15)):
    '''
    return a random datetime object between two datetime objects
    '''
    start_date = datetime.date(2020, 10, 1)
    end_date = datetime.date(end_date[0], end_date[1], end_date[2])
    
    num_days = (end_date - start_date).days
    rand_days = random.randint(1, num_days)
    random_date = start_date + datetime.timedelta(days=rand_days)
    
    return random_date

In [None]:
def get_day_suffix(day):
    suffix = None
    if (day <= 20 and day >= 4) or (day <= 30 and day >= 24):
        suffix = 'th'
    else:
        suffix = ["st", "nd", "rd"][day % 10 - 1]
    return suffix

In [None]:
def random_date_string(context_date):
    '''
    returns a random date string in a random format
    '''
    context_date = attrgetter('year', 'month', 'day')(context_date)
    
    date = random_date(end_date=context_date)
    
    # convert the date to a random format
    date_format = random.choice(date_string_formats)
    if '{{ suffix }}' in date_format:
        suffix = get_day_suffix(date.day)
        date_template = environment.from_string(date_format)
        date_format = date_template.render(suffix=suffix)
    date_string = date.strftime(date_format)
    
    return (date_string, date)

In [None]:
# creates a list of US large cap stocks

equities = fd.Equities()

e_df = equities.search(country="United States", market_cap='Large Cap', exclude_exchanges=True)

ticker_name_list = []

for idx, eq in e_df.iterrows():
    ticker_name_list.append({
        'symbol': idx,
        'name': eq['name'].split('.')[0],
        'short_name': eq['name'].split(',')[0],
        'currency': 'USD',
    })


In [None]:
count = 0
action_template = environment.from_string('{"action": "qStock", "params": {"symbol": "{{ stock_symbol }}", "date": "{{ date_string }}"}}')
response_template = environment.from_string("The stock price of {{ stock_name }}({{ stock_ticker }}) is {{ closing_price }} on {{ date_string }}.")

# while count < 13000:
while count < 1300:
    error = False
    # for template_string in simple_price_templates:
    template_string = random.choice(simple_price_templates)
    context_date = random_date()
    context_date = datetime.datetime(context_date.year, context_date.month, context_date.day)
    stock = random.choice(ticker_name_list)
    template = environment.from_string(template_string)
    if '{{ date }}' in template_string:
        date_string, date = random_date_string(context_date)
    elif '{{ free_date }}' in template_string:
        date_string = random.choice(free_date_list)
    else:
        date_string = 'now'
    stock_string = tuple(stock.values())[random.randint(0, 2)]
    prompt = template.render(stock=stock_string, date=date_string)
    action = action_template.render(stock_symbol=stock['symbol'], date_string=date_string)
    print(action)
    try:
        # get the yahoo finance data
        knowledge, parsed_date, last_close = get_yfinance_data(action, context_date)
        parsed_date_string = parsed_date.strftime('%A, %B %-d, %Y')  # like: Wednesday, March 2, 2022
        response = response_template.render(stock_name=stock['name'], stock_ticker=stock['symbol'], closing_price=last_close, date_string=parsed_date_string)
    except Exception as e:
        print(e)
        knowledge = None
        response = None
        parsed_date = None
        error = True
        last_close = None
    item = {
        'prompt': prompt,
        'action': action,
        'knowledge': knowledge,
        'response': response,
        'meta_data': {
            "context_date": context_date.isoformat(),
            "date_string": date_string,
            'parsed_date': parsed_date.isoformat() if parsed_date else None,
            'last_close': last_close,
            "stock": stock,
            'error': error,
        },
    }
    with open('./data_out/eval_stock_prices.jsonl', 'a', encoding='utf-8') as f:
        f.write(json.dumps(item) + '\n')
    count += 1

In [None]:
# a little cleaning on the data
import duckdb as dd

data = dd.sql("""
    SELECT * FROM './data_out/eval_stock_prices.jsonl'
    WHERE response IS NOT NULL
""").fetchdf().to_json('./data_out/cleaned_eval_stock_prices.jsonl', orient='records', lines=True, mode='a')
# .fetchdf().to_json('./data_out/stock_prices_cleaned.jsonl', orient='records', lines=True, mode='a')

# stop here... random notes below

top secret... proceed with caution :p you've been warned. :p 

## misc categories:

Stock Performance:

"How is {stock} performing?"
"Tell me about {stock}'s recent price movements."
"Is {stock} trending upwards or downwards?"
"What's the recent performance of {stock}?"

Stock Prices:

"What is the current stock price of {stock}?"
"Can you provide me with {stock}'s historical price data?"
"Give me the opening and closing prices for {stock} on {date}."

Financial Metrics:

"What's the market capitalization of {stock}?"
"Tell me about {stock}'s earnings per share."
"What's the price-to-earnings ratio of {stock}?"

Dividends and Returns:

"Has {stock} paid any dividends recently?"
"What's the dividend yield for {stock}?"
"How have the returns on {stock} compared to the market?"

Market News and Analysis:

"Tell me about the latest news related to {stock}."
"Can you provide a summary of analyst opinions on {stock}?"
"Give me insights into recent developments affecting {stock}."

Comparisons and Analysis:

"Compare the performance of {stock} and {another stock}."
"What are the main differences between {stock} and {another stock}?"
"Which sector does {stock} belong to, and how is it faring?"

Company Information:

"Tell me about the history and background of {stock}'s company."
"What are {stock}'s main products or services?"
"Give me an overview of {stock}'s leadership team."

Market Trends:

"Are there any emerging trends in {stock}'s industry?"
"Tell me about the recent performance of stocks in the {industry} sector."
"How has {stock} been affected by recent market trends?"

Technical Analysis:

"Provide a technical analysis of {stock}'s chart."
"What are the key support and resistance levels for {stock}?"
"Can you identify any recent patterns in {stock}'s price movements?"

Investment Strategies:

"Is {stock} considered a growth or value stock?"
"Tell me about the long-term potential of investing in {stock}."
"What are some common investment strategies for {stock}?"

In [None]:
ticker_name_list = []

for idx, eq in e_df.iterrows():
    ticker_name_list.append({
        'symbol': idx,
        'name': eq['name'].split('.')[0],
        'short_name': eq['name'].split(',')[0],
        'currency': 'USD',
    })

In [None]:
ticker_name_list

In [None]:
stock_news_tremplates = [
    "Tell me about the recent news related to {stock}.",
    "What are some recent developments that might impact {stock}?"
]

In [None]:
misc_tempaltes = [
    
    "How is {stock} performing in the market?"
    "Is {stock} trending upwards or downwards?"

    "What's the 52-week high and low for {stock}?"
    "Give me a summary of {stock}'s recent earnings report."
    "Has {stock} paid any dividends recently?"
    "What's the market capitalization of {stock}?"
    "Can you provide me with a brief overview of {stock}?"
    "What are the top competitors of {stock}?"
    "How has {stock} performed over the past month?"
    "Tell me about {stock}'s price-to-earnings ratio."
    "Is {stock} included in any major stock indices?"
    "What's the trading volume of {stock} today?"
    "Can you compare the performance of {stock} and {another stock}?"
    "What is the analyst consensus on {stock}?"
    "Has {stock} shown any significant volatility recently?"
    "Tell me about any recent insider trading activities for {stock}."
    "Is {stock} considered a growth or value stock?"
    "What's the short interest for {stock}?"
    "Can you provide a historical chart for {stock}'s performance?"
    "How has {stock}'s price changed over the past year?"
    "What are some recent developments that might impact {stock}?"
    "What are the largest comapnies by market cap?"
]