# Imports

In [366]:
import os

import pandas as pd

from dotenv import load_dotenv, find_dotenv
from openai import OpenAI

import utility.utility as util

# Below import and instructions simply for display
from IPython.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

# resets import once changes have been applied
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Directory and File Paths

In [149]:
path_data = os.path.join(os.getcwd(), "raw_data")

path_stmts = os.path.join(path_data, "predict")

#file_meta = os.path.join(path_raw_data, "manual_cases.csv")
#file_data = os.path.join(path_raw_data, "data.csv")
file_excel = os.path.join(path_raw_data, "sample_28feb.xlsx")

# Load Environment Variables

In [52]:
_ = load_dotenv(find_dotenv())

# Load Data

In [160]:
df_meta = pd.read_excel(file_excel, sheet_name="manual cases") #pd.read_csv(file_meta)
df_data = pd.read_excel(file_excel, sheet_name="Sheet1") #pd.read_csv(file_data)

# Preliminary Data Overview

In [161]:
df_meta.dropna(axis=1, inplace=True, how="all")
df_meta.drop(columns=["checked by", "firm", "path_txt", "path_doc", "manual"], inplace=True)
df_meta["filename"] = df_meta["filename"].astype("str") + ".txt"
df_meta["filepath"] = df_meta["filename"].apply(lambda x: os.path.join(path_stmts, x))

In [155]:
df_data.drop(columns=["path_txt", "path_doc","selected","easy",], inplace=True)
df_data.dropna(axis=1, inplace=True, thresh = int(df_data.shape[0]*.2))
df_data["filename"] = df_data["filename"].astype("str") + ".txt"

# Initialize Client

In [25]:
client = OpenAI()

In [515]:
df_meta

Unnamed: 0,cc_iso3,year,filename,paragraph (context),sentence,term,classification,page (txt),note,source manual,...,cat2,term3,full_term3,term4,full_term4,term5,full_term5,term6,full_term6,filepath
0,GBR,2011,22192022.txt,In our opinion:\n· the financial statements gi...,In our opinion:\n· the financial statements gi...,IFRS as adopted by the European Union,EU,28,,audit,...,,,,,,,,,,C:\Users\ilias\Desktop\UniMaResearch2023\Extra...
1,GBR,2011,22192022.txt,2 Summary of significant accounting policies T...,The consolidated financial statements of Anglo...,International Financial Reporting Standards as...,EU,37,,notes,...,,,,,,,,,,C:\Users\ilias\Desktop\UniMaResearch2023\Extra...
2,AUS,2016,23196675.txt,The consolidated entity has the ability and in...,The financial report complies with Australian ...,Australian Accounting Standards and Internatio...,dual Aus,29,,notes,...,,,,,,,,,,C:\Users\ilias\Desktop\UniMaResearch2023\Extra...
3,AUS,2016,23196675.txt,"In conducting our audit, we have complied with...",In our opinion:\n(a) the financial report of N...,Australian Accounting Standards and Internatio...,dual Aus,54,,audit,...,,,,,,,,,,C:\Users\ilias\Desktop\UniMaResearch2023\Extra...
4,GBR,2005,60654932.txt,In our opinion:\n– the financial statements gi...,In our opinion:\n– the financial statements gi...,-,general,49,2 columns. Text is not read well but we were s...,audit,...,,,,,,,,,,C:\Users\ilias\Desktop\UniMaResearch2023\Extra...
5,GBR,2005,60654932.txt,Basis of preparation\nThe consolidated account...,The consolidated accounts are prepared under t...,the accounting policies set out below,general,55,,notes,...,,,,,,,,,,C:\Users\ilias\Desktop\UniMaResearch2023\Extra...
6,GBR,2006,60807516.txt,accounting policies basis of preparation the g...,the group financial statements have been prep...,international financial reporting standards a...,EU,33,,notes,...,,uk gaap,by the eu ifrs the group do not expect there t...,,,,,,,C:\Users\ilias\Desktop\UniMaResearch2023\Extra...
7,GBR,2006,60807516.txt,Opinion In our opinion: · the group financial ...,In our opinion: · the group financial stateme...,IFRSs as adopted by the EU,EU,29,,audit,...,,ifrss as adopted by the eu,irregularity or error in forming our opinion w...,,,,,,,C:\Users\ilias\Desktop\UniMaResearch2023\Extra...
8,GBR,2011,61515211.txt,Opinion on financial statements\nIn our opinio...,In our opinion:\n••the financial statements gi...,IFRSs as adopted by\nthe European Union,EU,27,2 columns. Text is not read well but we were s...,audit,...,,,,,,,,,,C:\Users\ilias\Desktop\UniMaResearch2023\Extra...
9,GBR,2011,61515211.txt,1. Principal accounting policies\nGeneral\nPol...,The consolidated financial statements have bee...,International Financial Reporting Standards (...,EU and act,34,,notes,...,,,,,,,,,,C:\Users\ilias\Desktop\UniMaResearch2023\Extra...


# Baustelle

In [516]:
i = 27

In [517]:
text = util.parse_txt(df_meta["filepath"][i])

In [518]:
cleaned_text = util.clean_text(text)

# Model Overview
* https://platform.openai.com/docs/models/overview
# Message Types
* system: messages describe the behavior of the AI assistant. A useful system message for data science use cases is "You are a helpful assistant who understands data science."
* user: messages describe what you want the AI assistant to say. We'll cover examples of user messages throughout this tutorial
* assistant messages describe previous responses in the conversation. We'll cover how to have an interactive conversation in later tasks

The first message should be a system message. Additional messages should alternate between the user and the assistant.

# Model Response:
GPT models return a status code with one of four values, documented in the Response format section of the Chat documentation.
* stop: API returned complete model output
* length: Incomplete model output due to max_tokens parameter or token limit
* content_filter: Omitted content due to a flag from our content filters
* null: API response still in progress or incompleteplete

In [345]:
def get_completion(prompt, model = "gpt-4-0125-preview"):
    messages = [{"role":"system", "content": "You are a financial accountant"},
                {"role": "user", "content": prompt}]
    response = client.chat.completions.create(model=model,
                                     messages=messages,
                                     temperature=0,
                                    )
    return response

In [524]:
prompt = f"""
Your task is to extract  information from a provided financial statement text. \
More Specifically you are tasked to extract according to or in compliance with what accounting standard \
the financial statement has been prepared. It is possible that a financial statement has\
been constructed in accordance with more than one standard. Make sure you find all that apply.\
They should be mentioned right after each other.

I will provide you with a long text sequence and you should respond with the \
specific wording applied in the text.

I provide you with a segment delimited by tags (<text>, <\text>) and you should extract the desired information.
Only answer with word sequences you find in the provided delimited text!

Here is an example text from which you would be expected to extract one item:

Answer: International Financial Reporting Standards as adopted by the European Union

Text: 2 Summary of significant accounting policies The principal accounting policies applied\
in the preparation of these consolidated financial statements are set out below. These\
policies have been consistently applied to all the years presented, unless otherwise\
stated.\n2.1 Basis of preparation The consolidated financial statements of Anglo Pacific\
Group PLC have been prepared in accordance with International Financial Reporting Standards\
as adopted by the European Union (IFRSs as adopted by the EU), IFRIC interpretations and the\
Companies Act 2006 (United Kingdom) applicable to companies reporting under IFRS. The\
consolidated financial statements have been prepared under the historical cost convention,\
as modified by the revaluation of coal royalties, available-for-sale financial assets, and\
financial assets and financial liabilities (including derivative instruments) at fair value\
through profit or loss.\nThe preparation of financial statements in conformity with IFRS requires\
the use of certain critical accounting estimates. It also requires management to exercise its judgement\
in the process of applying the Group's accounting policies. The areas involving a higher degree of judgement\
or complexity, or areas where assumptions and estimates are significant to the consolidated financial\
statements are disclosed in note 4.\n2.1.1 Changes in accounting policies and disclosures (a) New and amended standards adopted by the Group

Here is another example from which you would be able to extract two items:

Answer:  Australian Accounting Standards
Answer:  International Financial Reporting Standards

Text: In conducting our audit, we have complied with the independence requirements of the Corporations Act 2001. We confirm\
that the independence declaration required by the Corporations Act 2001, which has been given to the directors of Nevada Iron\
Limited, would be in the same terms if given to the directors as at the time of this auditor's report.\nOpinion\nIn our\
opinion:\n(a) the financial report of Nevada Iron Limited is in accordance with the Corporations Act 2001, including:\n(i)\
giving a true and fair view of the consolidated entity's financial position as at 30 June 2015 and of its performance for\
the year ended on that date; and\n(ii) complying with Australian Accounting Standards and the Corporations Regulations 2001;\
and\n(b) the financial report also complies with International Financial Reporting Standards as disclosed in Note 2.\nEmphasis\
of Matter\nAs disclosed in the financial statements, the Company and consolidated entity had net current liabilities of $621,730\
and $1,563,984 at 30 June 2015, respectively, and incurred net after tax losses of $24,943,785 and $29,669,696, respectively,\
and the consolidated entity had net operating cash outflows of $778,045 and net cash outflows of $2,862,345 for the year then ended.\
These conditions, along with other matters as set forth in Note 2, indicate the existence of a material uncertainty which may cast\
significant doubt about the Company and consolidated entity's ability to continue as going concerns and therefore, the Company and\
consolidated entity may be unable to realise their assets and discharge their liabilities in the normal course of business.\nReport on\
the Remuneration Report\nWe have audited the Remuneration Report contained within the directors' report for the year ended 30 June 2015.\
The directors of the company are responsible for the preparation and presentation of the Remuneration Report in accordance with section\
300A of the Corporations Act 2001. Our responsibility is to express an opinion on the Remuneration Report, based on our audit conducted\
in accordance with Australian Auditing Standards.\nOpinion\nIn our opinion the Remuneration Report of Nevada Iron Limited for the year\
ended 30 June 2015 complies with section 300A of the Corporations Act 2001.\nRSM BIRD CAMERON PARTNERS


<text>{cleaned_text}<\text>
"""

In [525]:
response = get_completion(prompt)

In [526]:
response.choices[0].message.content

'International Financial Reporting Standards'

In [527]:
response

ChatCompletion(id='chatcmpl-8xmpqUwFhnpnGU7s1O5UKZe8SOrFl', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='International Financial Reporting Standards', role='assistant', function_call=None, tool_calls=None))], created=1709259766, model='gpt-4-0125-preview', object='chat.completion', system_fingerprint='fp_91aa3742b1', usage=CompletionUsage(completion_tokens=4, prompt_tokens=40788, total_tokens=40792))

In [453]:
response.choices[0]

Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='International Financial Reporting Standards as adopted by the European Union', role='assistant', function_call=None, tool_calls=None))

In [520]:
util.count_tokens(prompt)

40772

In [521]:
cleaned_text

'ANNUAL REPORT TRANSFORMATION CONTENTS ABTERRA LTD. (Incorporated in Singapore) AND SUBSIDIARIES Corporate Philosophy Corporate Profile Chairman\'s Statement Board of Directors Senior Management Operating Companies Corporate Structure Year Financial Statements Operating & Financial Performance Review Corporate Governance & Financial Report Statistics of Shareholdings Notice of Annual General Meeting Proxy Form ABTERRA LTD. ANNUAL REPORT CORPORATE PHILOSOPHY Abterra\'s fundamental approach to business is to create sustainable long-term growth for our customers, our business partners, our employees and our shareholders through our distinctive core values: AGILITY We constantly train and develop our employees into active and nimble individuals to cater to our customers, our business partners and our stakeholders\' needs. RELIABILITY We aim to provide services that are unsurpassed in quality and reliability through the vertical integration of resources into a competitive cost execution for

In [522]:
df_meta["term"][i]

'singapore financial reporting standards'

In [523]:
df_meta["paragraph (context)"][i]

'opinion in our opinion the financial statements of the group and the statements of financial position and changes in equity of the company are properly drawn up in accordance with the provisions of the act and singapore financial reporting standards so as to give a true and fair view of the state of affairs of the group and of the company as at 31 december 2014 and the results changes in equity and cash flows of the group and changes in equity of the company for the'

In [293]:
df_meta.iloc[i,:]

cc_iso3                                                              GBR
year                                                                2006
filename                                                    60807516.txt
paragraph (context)    accounting policies basis of preparation the g...
 sentence               the group financial statements have been prep...
term                    international financial reporting standards a...
classification                                                        EU
page (txt)                                                            33
note                                                                 NaN
source manual                                                      notes
source_cats_term       [of period 4 694 10 104 cash and cash equivale...
terms_notes            method of accounting|convention|basis of prepa...
terms_audit                                                          NaN
source                                             