# Strategy Construction Tool
This is to produce trade reports that can be used in the evaluation section to backtest.

In [1]:
!pip -q install --force-reinstall langgraph langchain_aws langchain-huggingface langchain

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
jupyter-ai 2.28.5 requires faiss-cpu!=1.8.0.post0,<2.0.0,>=1.8.0, which is not installed.
bqservices 1.586.0 requires pybql>=0.125.0, which is not installed.
bqservices 1.586.0 requires pytables>=3.6.1, which is not installed.
bqservices 1.586.0 requires python>=3.7.0, which is not installed.
bqpopt 0.148.0 requires bloomberg.bquant._internal.serializer_utils<1.0,>=0.2, which is not installed.
bqpopt 0.148.0 requires pybql<1.0,>=0.129, which is not installed.
bqpopt 0.148.0 requires python<4.0,>=3.9, which is not installed.
bqplayback 0.29.0 requires pybql, which is not installed.
bqplayback 0.29.0 requires python<3.6,>=3.5, which is not installed.
bqnb 2.8.0 requires conda, which is not installed.
bqlogger 0.39.0a0 requires python<4.0,>=3.9, which is not installed.
bql-enterprise 0.16.0 requires pybql, which

In [3]:
import copy
import pandas as pd
import datarequest.company_data as cd
import agents.financial_agent as fa
import agents.committee_agent as ca
import importlib
from tqdm import tqdm
import json

from utils.s3_helper import S3Helper
from datetime import datetime

import concurrent.futures


In [33]:
importlib.reload(fa)
importlib.reload(ca)

<module 'agents.committee_agent' from '/project/agents/committee_agent.py'>

### Collect the data needed to run the Agents
The Agents will require financial statement datasets and news datasets in order to conduct their analysis.

In [5]:
# Get the company reports
security_datasets = cd.SecurityData('tmp/fs', 'dow_quarterly_ltm_v2.json')
# get the saved news datasets
s3_helper = S3Helper('tmp/fs')
s3_helper.get_file(filename='dow_headlines.parquet', local_filename='/tmp/dow_headlines.parquet')
# Convert to pandas dataframe
news_headlines = pd.read_parquet('/tmp/dow_headlines.parquet')

### Run the Agentic Models
Running the inference tasks across 893 date/ security combinations in 1:32h.

In [36]:
financial_agent = fa.FinancialAnalystAgent()
committee_agent = ca.CommitteeAgent()

In [16]:
def run_single(security: str, as_of_date: str) -> dict:
    """
    Function to run a single run of the Agent
    """
    company_data = security_datasets.get_security_all_data(as_of_date, security)
    # Time the run
    start_time = datetime.now()
    # Run the financial analyst agent
    financial_report = financial_agent.run(security_data=company_data, 
                                       news_data=news_headlines, 
                                       as_of_date=as_of_date)
    # Run the committee agent
    committee_report = committee_agent.run(senior_analyst_report=financial_report['senior_report'],
                                           financial_statement_analysis=financial_report['financial_report'],
                                           security_data=company_data)
    end_time = datetime.now()
    decision_dict = {
        'date': as_of_date,
        'security': security,
        'earning_decision': financial_report['final_output'].direction,
        'earning_magnitude': financial_report['final_output'].magnitude,
        'earning_confidence': financial_report['final_output'].confidence,
        'recommendation': committee_report['results'].recommendation,
        
        'responses': {'financial_analyst': financial_report,
                     'committee_report': committee_report},
        'time': str(end_time - start_time)
    }
    return decision_dict
    
    

In [20]:
dates_and_securities = security_datasets.date_security_timeseries()
test_security = dates_and_securities[200]

In [21]:
test_security

{'date': '2021-05-04', 'security': 'PFE UN Equity'}

In [25]:
%%time
test_output = run_single(test_security['security'],test_security['date'])

CPU times: user 4.35 s, sys: 57.1 ms, total: 4.41 s
Wall time: 3min 2s


In [30]:
#dict_keys(['date', 'security', 'earning_decision', 'earning_magnitude', 'earning_confidence', 'recommendation', 'responses', 'time'])
test_output['security']

'PFE UN Equity'

### Run the backtest

In [44]:
#data_output = []
def backtest(dates_and_securities):
    # create the securities
    progress = tqdm(total=len(dates_and_securities), position=0, leave=True)
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        futures = [executor.submit(run_single, single['security'], single['date']) for single in dates_and_securities]
        for f in concurrent.futures.as_completed(futures):
            progress.update(n=1)
            data_output.append(f.result())
        

In [None]:
# Run the backtest
backtest(dates_and_securities[470:])

  5%|▌         | 31/579 [1:02:51<18:31:05, 121.65s/it]
  9%|▉         | 39/423 [11:58<1:40:01, 15.63s/it] 

### Save the raw output to S3

In [None]:
data_analysis_final = copy.deepcopy(data_output)

In [None]:
# Some of the items are not serializable. Convert to string before JSON
for item in data_analysis_final:
    item['responses']['financial_analyst']['final_output'] = str(item['responses']['financial_analyst']['final_output'])
    item['responses']['committee_report']['results'] = str(item['responses']['committee_report']['results'])
    item['responses']['committee_report']['history'] = str(item['responses']['committee_report']['history'])
    item['time']=str(item['time'])

In [None]:
with open('/tmp/agentic_output_llama_v3_1.json', 'w') as f:
    json.dump(data_analysis_final, f)

In [None]:
s3_helper.add_file(local_filename='/tmp/agentic_output_llama_v3_1.json')

### Create the Trade Report

In [None]:
def trade_report_generation(data, trade_only=False) -> pd.DataFrame:
    """
    Function to create the trade report to pass into the Strategy Analysis tool
    """
    trade_report = []
    if trade_only:
        for item in data:
            trade_report.append({'date': item['date'], 
                                 'security': item['security'], 
                                 'decision': item['recommendation'],
                                 'confidence': item['earning_confidence']})
    
        return pd.DataFrame(data=trade_report)
    else:
        for item in data:
            trade_report.append({'date': item['date'], 
                                 'security': item['security'], 
                                 'decision': item['recommendation'],
                                 'confidence': item['earning_confidence'],
                                 'earning_decision': item['earning_decision'],
                                 'earning_magnitude': item['earning_magnitude']})
        return trade_report

In [None]:
trades = trade_report_generation(data_analysis_final)

In [None]:
with open('Results/Agentic/trades_llama_v3.json', 'w') as f:
    json.dump(trades, f)

In [20]:
len(data_analysis_final)

893

In [50]:
len(data_output)

470

In [14]:
s3_helper.get_file('agentic_output_claude_1.json','/tmp/agentic_output_claude_1.json')

In [15]:
with open('/tmp/agentic_output_claude_1.json', 'rb') as f:
    old_output = json.load(f)

In [16]:
len(old_output)

228

In [17]:
final_output = old_output + data_analysis_final

In [None]:
len(final_output)

In [19]:
data_analysis_final = final_output

In [49]:
data_output[0]

{'date': '2020-05-13',
 'security': 'CSCO UW Equity',
 'earning_decision': 'decrease',
 'earning_magnitude': '2-3%',
 'earning_confidence': 'high',
 'recommendation': 'SELL',
 'responses': {'financial_analyst': {'company_details': {'name': 'Cisco Systems Inc',
    'figi_name': 'BBG000C3J3C9',
    'sector': 'Technology',
    'sec_fs': 'Income Statement:                                                        t           t-1           t-2           t-3           t-4           t-5\nitems                                                                                                                          \nRevenue                                      5.057500e+10  5.155000e+10  5.199100e+10  5.190400e+10  5.132000e+10  5.082500e+10\nCost of Revenue                              1.800200e+10  1.857500e+10  1.900700e+10  1.923800e+10  1.930600e+10  1.922500e+10\nGross Profit                                 3.257300e+10  3.297500e+10  3.298400e+10  3.266600e+10  3.201400e+10  3.160000e+10\nO

In [None]:
print("Hello")