In [16]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM


In [17]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Importing the flan-t5 financial agent

In [18]:
from peft import PeftModel, PeftConfig

peft_model_id = 'Chaitanya14/Financial_Agent_flant5'

config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path)
model = PeftModel.from_pretrained(model, peft_model_id)

In [19]:
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Testing the model output on a query

In [20]:
query = 'What was the net income of Google in 2019?'


input = 'Analyse the following query and provide the names of the companies, the relevant information and their related concepts, and the dates for which the information is required:\nQuery: ' + query + '\n'

inputs = tokenizer(" " + input,return_tensors="pt")

In [21]:
model.to(device)

with torch.no_grad():
    inputs = {k: v.to(device) for k, v in inputs.items()}
    outputs = model.generate(input_ids=inputs["input_ids"], max_new_tokens=512)
    print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True))


['Companies: Google Inc. Information: Net income (Related Concepts: Revenue, Cost of Goods Sold (COGS), Operating Expenses, Depreciation and Amortization, Interest Expense, Income Tax Expense, Extraordinary Items) Dates: in 2019']


# Loading the financial data

In [22]:
import pickle
import re
import pandas as pd

dbFile = open('data\\companyData-us' + '.pkl', 'rb')
allCompanyData = pickle.load(dbFile)
dbFile.close()

dbFile = open('data\\conceptData-us' + '.pkl', 'rb')
allConceptData = pickle.load(dbFile)
dbFile.close()

dbFile = open('data\\indexData.pkl', 'rb')
allIndexData = pickle.load(dbFile)
dbFile.close()

appGlobalData = {}
appGlobalData['allCompanyData'] = allCompanyData
appGlobalData['allIndexData'] = allIndexData
appGlobalData['allConceptData'] = allConceptData

companyTickers = appGlobalData['allCompanyData']['df'].keys()
for tick in companyTickers:
    keys = appGlobalData['allCompanyData']['df'][tick]["info"].keys() 
    vals = appGlobalData['allCompanyData']['df'][tick]["info"].values() 
    #Convert keys of the form "oneTwoThree" to "one two three"
    keys = [re.sub(r"([a-z])([A-Z|0-9])", r"\1 \2", key).lower() for key in keys]
    #Convert keys of the form "one_two_three" to "one two three"
    keys = [re.sub(r"_"," ",key) for key in keys]
    #Storing keys in proper format so that it can be read easily  
    appGlobalData['allCompanyData']['df'][tick]["info"] = dict(zip(keys,vals))

# Examples of relevant data being fetched from the data dictionary

In [23]:
pd.DataFrame(appGlobalData['allCompanyData']['df']['GOOGL']['cashflow']['Net Income'])

Unnamed: 0,Net Income
,
2021-12-31 00:00:00,76033000000.0
2020-12-31 00:00:00,40269000000.0
2019-12-31 00:00:00,34343000000.0
2018-12-31 00:00:00,30736000000.0


In [24]:
pd.DataFrame(appGlobalData['allCompanyData']['df']['GOOGL']['earnings']['Revenue'])

Unnamed: 0_level_0,Revenue
Year,Unnamed: 1_level_1
2018,136819000000
2019,161857000000
2020,182527000000
2021,257637000000


In [25]:
pd.DataFrame(appGlobalData['allCompanyData']['df']['GOOGL']['financials']['Interest Expense'])

Unnamed: 0,Interest Expense
,
2021-12-31 00:00:00,-346000000.0
2020-12-31 00:00:00,-135000000.0
2019-12-31 00:00:00,-100000000.0
2018-12-31 00:00:00,-114000000.0
