# Init

In [130]:
import json
import os
import pandas as pd
import requests

# pprint is used to format the JSON response
from pprint import pprint
from tqdm.auto import tqdm

key = "7f08d854fe9d4919b321c74378c984ce"
endpoint = "https://earnings-call.cognitiveservices.azure.com"

sentiment_url = endpoint + "/text/analytics/v3.0/sentiment"
headers = {'Content-Type': 'application/json', 'Ocp-Apim-Subscription-Key': key}

# parse

## get response

In [143]:
targets_df = pd.read_feather('data/f_sue_keydevid_car_finratio_vol_transcriptid_sim_inflow_revision_text_norm.feather')

text_present = targets_df[['docid', 'text_present']]
text_qa = targets_df[['docid', 'text_qa']]

In [None]:
%%time

def get_response(text_df):

    try: 
        results = []
        request_docs = []
        
        for doc_i, (_, docid, text) in enumerate(tqdm(text_df.itertuples(), total=len(text_df))):
            text_chunks = list(chunks(text, 5120))
            for chunk_idx, text_chunk in enumerate(text_chunks):

                # add requests if less than 10 documents
                request_docs.append({'id':f'{docid},{chunk_idx}', 'language': 'en', 'text': text_chunk}) 

                # sanity check
                assert len(request_docs)<=10, f'There are more than 10 docs in a request at docid={docid}, chunk_idx={chunk_idx}!'

                # otherwise, send request
                if len(request_docs)==10 or (doc_i==len(text_present)-1 and chunk_idx==len(text_chunks)-1):

                    # get response
                    response = requests.post(sentiment_url, headers=headers, json={'documents': request_docs})
                    response = response.json()

                    # check parse errors
                    assert len(response['errors']) == 0, f"There are errors, please check!\n{response['errors']}"

                    # collect results
                    results.extend(response['documents'])

                    # reset requests
                    request_docs = []

    except Exception as e:
        print(f'Exception caught at docid={docid}, chunk_idx={chunk_idx}!\n')
        print(e)
        
    finally:
        return results
    

present_response = get_response(text_present)
# qa_response = get_response(text_qa)

sv('present_response')

HBox(children=(FloatProgress(value=0.0, max=21763.0), HTML(value='')))

In [114]:
len(results)

42

## format results

In [None]:
results[0]

In [126]:
sentiment = []
for re in results:
    # re = json.dumps(re)
    docid, chunk_i = re['id'].split(',')
    chunk_sentiment = re['sentiment']
    chunk_positive = re['confidenceScores']['positive']
    chunk_neutral = re['confidenceScores']['neutral']
    chunk_negative = re['confidenceScores']['negative']
    
    for sentence_i, sentence in enumerate(re['sentences']):
        sentence_sentiment = sentence['sentiment']
        sentence_positive = sentence['confidenceScores']['positive']
        sentence_neutral = sentence['confidenceScores']['neutral']
        sentence_negative = sentence['confidenceScores']['negative']
        sentence = sentence['text']
        
        sentiment.append((docid, chunk_i, chunk_sentiment, chunk_positive, chunk_neutral, chunk_negative, \
                          sentence_i, sentence_sentiment, sentence_positive, sentence_neutral, sentence_negative, \
                          sentence))

In [128]:
sentiment = pd.DataFrame(sentiment, columns=['docid', 'chunk_i', 'chunk_sentiment', 'chunk_positive', 'chunk_neutral', 'chunk_negative', 'sentence_i', 'sentence_sentiment', 'sentence_positive', 'sentence_neutral', 'sentence_negative', 'sentence'])

Unnamed: 0,docid,chunk_i,chunk_sentiment,chunk_positive,chunk_neutral,chunk_negative,sentence_i,sentence_sentiment,sentence_positive,sentence_neutral,sentence_negative,sentence
0,001013-2008-03-05,0,mixed,0.75,0.14,0.11,0,positive,1.00,0.00,0.00,Good afternoon and thank you for joining us on...
1,001013-2008-03-05,0,mixed,0.75,0.14,0.11,1,neutral,0.02,0.97,0.01,"Bob Switz, ADC's President and CEO as well as ..."
2,001013-2008-03-05,0,mixed,0.75,0.14,0.11,2,neutral,0.02,0.53,0.45,Before we get started I need to caution you th...
3,001013-2008-03-05,0,mixed,0.75,0.14,0.11,3,negative,0.00,0.00,1.00,Actual results may be affected by many importa...
4,001013-2008-03-05,0,mixed,0.75,0.14,0.11,4,neutral,0.00,0.99,0.01,This earnings release can be accessed at the i...
...,...,...,...,...,...,...,...,...,...,...,...,...
1314,001045-2008-10-15,3,mixed,0.57,0.11,0.32,28,negative,0.39,0.07,0.54,"To conclude, while fuel prices have used off t..."
1315,001045-2008-10-15,3,mixed,0.57,0.11,0.32,29,positive,0.70,0.01,0.29,This said we are working diligently to right-s...
1316,001045-2008-10-15,3,mixed,0.57,0.11,0.32,30,positive,1.00,0.00,0.00,These are the right things to do for our compa...
1317,001045-2008-10-15,3,mixed,0.57,0.11,0.32,31,neutral,0.01,0.99,0.00,"With that, G"
