In [1]:
%pip install openai backoff

Collecting openai
  Downloading openai-0.27.7-py3-none-any.whl (71 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m00:01[0m
Collecting aiohttp (from openai)
  Using cached aiohttp-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)
Collecting multidict<7.0,>=4.5 (from aiohttp->openai)
  Using cached multidict-6.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (114 kB)
Collecting async-timeout<5.0,>=4.0.0a3 (from aiohttp->openai)
  Using cached async_timeout-4.0.2-py3-none-any.whl (5.8 kB)
Collecting yarl<2.0,>=1.0 (from aiohttp->openai)
  Using cached yarl-1.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (268 kB)
Collecting frozenlist>=1.1.1 (from aiohttp->openai)
  Using cached frozenlist-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (149 kB)
Collecting aiosignal>=1.1.2 (from aiohttp->openai)
  Using c

In [2]:
from pathlib import Path
import backoff 
import openai
import pickle
import time
import pandas as pd

from dotenv import dotenv_values

config = dotenv_values("./../../config/.env") # take environment variables from .env.
openai.organization = config["OPEN_AI_ORG"]
openai.api_key = config["OPEN_AI_TOKEN"]

annotations = pd.read_pickle(Path(config["BASE_PATH"])/"data/labeling/active-learning-labeling-gpt.pkl")
annotations = annotations[annotations.labeled]
if "gpt_loss" not in annotations.columns:
    annotations["gpt_loss"] = None
    annotations["gpt_unexpected"] = None

model = "gpt-3.5-turbo"

@backoff.on_exception(backoff.expo, openai.error.RateLimitError, on_backoff=lambda x: print(f"""Backing off: {round(x['wait'])} seconds"""), )
def completions_with_backoff(**kwargs):
    return openai.ChatCompletion.create(**kwargs)

In [3]:
query_template = """Does the following sequence contain (True) or not (False) a statement which describes or refers to {label}?
Sequence: {sequence}
Label: """

labels = {
    "loss": "a financial/business loss, or adverse business development experienced by the reporting entity",
    "unexpected": "an unexpected event or development experienced by, or related to the reporting entity"
}

In [7]:
responses = []
for i, x in annotations[annotations.gpt_loss.apply(lambda x: x is None)][:40].iterrows():
    for l in labels:
        response = completions_with_backoff(
            model=model,
            messages=[
                {"role": "system", "content": "You are a research assistant paid for labeling the following statements as accurately as possible. Please return the label as True or False."},
                {"role": "user", "content": query_template.format(sequence=x.text, label=labels[l])},
            ],
            temperature=0.2,
            max_tokens=10)
        annotations.loc[i, f"gpt_{l}"] = response.choices[0].message.content
        responses.append(
            {
                "index": i,
                "label": l,
                "response": response
            }
        )
        with open(Path(config["BASE_PATH"])/"data/labeling/gpt-responses.pkl", "wb") as f:
            pickle.dump(responses, f)
        annotations.to_pickle(Path(config["BASE_PATH"])/"data/labeling/active-learning-labeling-gpt.pkl")
        time.sleep(20)
    print(i)


37910
38345
38409
38464
38570
38653
39081
39181
39371
39949
40169
40368
40544
40595
40754
40799
40826
40959
40975
41417
41477
41533
41568
41675
41936
42032
42145
42260
42410
42430
42436
42551
42591
42600
42613
42646
42880
43702
43716
43787


In [9]:
annotations[annotations.gpt_loss.apply(lambda x: x is not None)][[]]#.shape

Unnamed: 0,report_id,paragraph_nr,text,n_words,filing_type,logits_loss,logits_unexpected,loss,unexpected,labeled,strategy,p_loss,p_unexpected,gpt_loss,gpt_unexpected
1874,1946,844,million. These actions were taken as a result ...,203,10K,"[-0.11371928, -0.12775159]","[-1.8307474851608276, 1.9387129545211792]",0,0,True,lc_loss,0.503508,0.022545,False,False
1962,5879,1436,"(2) Underwriting income (loss), adjusted under...",71,10K,"[-0.09082798, -0.09680451]","[-2.8704769611358643, 3.2351996898651123]",0,0,True,lc_loss,0.501494,0.002225,False,False
2100,2987,2109,"Reserves are reestimated quarterly, by combini...",263,10K,"[-1.2206439, 1.1299531]","[0.1147109791636467, -0.06223767250776291]",0,0,True,lc_unexpected,0.087018,0.544122,False,False
2534,AdmiralGroupPLC-AR_2013,184,"2013 was yet another good year for Admiral, bu...",28,annual_report,"[-3.4491937, 3.7907827]","[0.11399621516466141, 0.15437042713165283]",0,0,True,lc_unexpected,0.000717,0.489908,False,False
2577,1080,627,"In December 1997, the Company began a comprehe...",278,10K,"[1.8032632, -2.389743]","[-1.3694382905960083, 1.5590710639953613]",1,0,True,hc_t_loss,0.985124,0.050762,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42646,2844,870,The increase in net loss for Interest Expense ...,56,10K,"[0.85338163, -1.1634406]","[0.16455049812793732, -0.05269180238246918]",0,0,True,lc_unexpected,0.882552,0.554098,True,False
42880,StorebrandASA-AR_2000,115,A greenhouse for growth New technology and cha...,126,annual_report,"[-1.9969531, 2.1821964]","[0.08060984313488007, 0.2991379201412201]",0,0,True,lc_unexpected,0.015081,0.445584,False,False
43702,5023,1616,The methodology and revised assumptions the Co...,80,10K,"[-0.023726357, -0.039020926]","[-2.0807933807373047, 2.091275453567505]",0,0,True,lc_loss,0.503824,0.015186,True,False
43716,5183,787,Overall industry-wide loss experience data and...,174,10K,"[-0.042522077, -0.052653357]","[-1.6770411729812622, 1.8301029205322266]",1,0,True,lc_loss,0.502533,0.029110,True,True


In [25]:
from sklearn.metrics import classification_report

In [19]:
df = annotations[annotations.gpt_loss.apply(lambda x: x is not None)]
df.gpt_loss = df.gpt_loss.apply(lambda x: int(x == "True"))
df.gpt_unexpected = df.gpt_unexpected.apply(lambda x: int(x == "True"))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.gpt_loss = df.gpt_loss.apply(lambda x: int(x == "True"))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.gpt_unexpected = df.gpt_unexpected.apply(lambda x: int(x == "True"))


In [37]:
print(classification_report(df.loss.to_list(), df.gpt_loss.to_list()))
print(classification_report(df.loss.to_list(), (df.p_loss > 0.5).astype(int).to_list()))

              precision    recall  f1-score   support

           0       0.97      0.66      0.79       137
           1       0.57      0.95      0.71        63

    accuracy                           0.76       200
   macro avg       0.77      0.81      0.75       200
weighted avg       0.84      0.76      0.76       200

              precision    recall  f1-score   support

           0       0.93      0.74      0.82       137
           1       0.60      0.87      0.71        63

    accuracy                           0.78       200
   macro avg       0.77      0.81      0.77       200
weighted avg       0.83      0.78      0.79       200



In [36]:
print(classification_report(df.unexpected.to_list(), df.gpt_unexpected.to_list()))
print(classification_report(df.unexpected.to_list(), (df.p_unexpected > 0.5).astype(int).to_list()))

              precision    recall  f1-score   support

           0       0.95      0.48      0.64       183
           1       0.11      0.71      0.19        17

    accuracy                           0.50       200
   macro avg       0.53      0.59      0.42       200
weighted avg       0.88      0.50      0.60       200

              precision    recall  f1-score   support

           0       0.99      0.56      0.71       183
           1       0.16      0.94      0.28        17

    accuracy                           0.59       200
   macro avg       0.58      0.75      0.50       200
weighted avg       0.92      0.59      0.68       200

              precision    recall  f1-score   support

           0       0.53      0.59      0.56        93
           1       0.61      0.55      0.58       107

    accuracy                           0.57       200
   macro avg       0.57      0.57      0.57       200
weighted avg       0.57      0.57      0.57       200



In [22]:
df[(df.gpt_loss != df.loss)][["text","gpt_loss", "loss"]]

Unnamed: 0,text,gpt_loss,loss
3082,We expect to continue to experience unpredicta...,1,0
3196,The primary sources of funds for Unitrin’s ins...,1,0
3726,The most important line of business in propert...,1,0
3745,We entered into a deferred compensation agreem...,1,0
3954,and cash equivalents in 2017 was due to paymen...,1,0
4018,"While the Company has obtained insurance, impl...",1,0
4372,In the beginning of the year the tax treatment...,1,0
4977,P eople are living ever longer and can afford ...,1,0
5469,Mandatum Life to change its focus from new sal...,1,0
5632,portfolio for declines in value that are other...,1,0


In [23]:
df[(df.gpt_unexpected != df.unexpected)][["text","gpt_loss", "loss"]]

Unnamed: 0,text,gpt_loss,loss
2577,"In December 1997, the Company began a comprehe...",1,1
2770,"In 2000, the provision for incurred losses and...",1,1
3082,We expect to continue to experience unpredicta...,1,0
3196,The primary sources of funds for Unitrin’s ins...,1,0
3726,The most important line of business in propert...,1,0
...,...,...,...
41936,A CHF 633 million operating loss for 2002 refl...,1,1
42145,"In Q4 2015, the Polish 10-year treasury bonds ...",0,0
42430,· Our controls and procedures may fail or be c...,1,0
42436,"Against this backdrop, it is to be considered ...",0,1
