Copyright 2025 Province of British Columbia

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.

## Access the Language Service API

This notebook contains example code to interact with the Language Studio endpoint, once a model has already been trained. 

In [None]:
# system stuff
import sys
import os

# requests stuff
import requests
import time

# standard stuff
import pandas as pd

# scoring
from sklearn.metrics import precision_score, f1_score, recall_score, accuracy_score

# my stuff
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
from src.config import azure_language_endpoint, azure_language_key, azure_language_deployment, azure_language_project, data_path_rvm, out_folder
from src.prepare_data import create_train_test_dataframes

In [None]:
def clean_class(x):
    # class categories for the language service must not contain special characters
    # and must be less than 50 characters
    return x.replace(':','').replace('/','')[0:50]

In [None]:
# read in a list of documents to classify
df = pd.read_excel(data_path_rvm, sheet_name = 'Q07a')
df = df.iloc[:, 4:-1]

# rename columns
df.columns = ['Response'] + [clean_class(x) for x in list(df.columns[1:])]

# remove multi-line characters and NA responses
df = df[~pd.isna(df['Response'])]
df['Response'] = df['Response'].apply(lambda x: x.replace('\n',' '))
# map categories to 0/1 instead of NaN/X
df.iloc[:, 1:] = df.iloc[:, 1:].map(lambda x: 1 if x=='X' else 0)

# create None category
df['None'] = df.apply(lambda row: 1 if sum(row[1:])==0 else 0, axis=1)

# create a column flagging if this was held back as test data or not 
df['test_flag'] = False
df.loc[df.index>=1000, 'test_flag'] = True
df.head()

In [None]:
# create a list of documents that can be sent to the endpoint 
documents = []

for idx, row in df.iterrows():
    text = row.Response
    documents.append(
        {'id': idx, 'language': 'en-us', 'text': text}
    )

In [None]:
# test on small number first
documents_test = documents[:5]

In [None]:
# submit classification request
api_version = "2022-05-01"
post_url = f"{azure_language_endpoint}/language/analyze-text/jobs?api-version={api_version}"
headers = {
    "Content-Type": "application/json",
    "Ocp-Apim-Subscription-Key": azure_language_key
}

# can only request 25 at a time (maybe there's a batch endpoint to use somewhere?)
# but for some reason I don't follow it only returns 20, so ... that's weird. 
i_start = 0
i_end = 20

# initialize empty 
df_out = pd.DataFrame(columns=df.columns[1:-1])

# loop over all batches 
while i_start < len(documents):
    print(f'Retrieving Documents {i_start} - {i_end}')
    document_batch = documents[i_start: i_end]
    payload = {
        "displayName": "Multi Label Classification Job",
        "analysisInput": {"documents": document_batch},
        "tasks": [
            {
                "kind": "CustomMultiLabelClassification",
                "taskName": "Multi Label Classification",
                "parameters": {
                    "projectName": azure_language_project,
                    "deploymentName": azure_language_deployment
                }
            }
        ]
    }
    
    response = requests.post(post_url, headers=headers, json=payload)
    
    if response.status_code != 202:
        print(f"Error: {response.status_code}, {response.text}")
    
    # Extract result URL from response
    result_url = response.headers.get("Operation-Location")
    if not result_url:
        print("Error: No operation URL returned.")
    
    print(f"Job submitted. Polling results at: {result_url}")

    # Poll for Results
    while True:
        result_response = requests.get(result_url, headers=headers)
        result_json = result_response.json()
    
        status = result_json.get("status", "").lower()
        if status == "succeeded":
            print("Job completed successfully!")
            break
        elif status in ["failed", "canceled"]:
            print(f"Job failed: {result_json}")
            exit()
        else:
            print(f"Job status: {status}. Waiting...")
            time.sleep(5)  # Wait before polling again

    # Extract Results
    classification_results = []
    categories = list(df.columns[1:-1])
    task = result_json.get("tasks", [])
    for doc in task['items'][0]['results']['documents']:
        doc_id = doc["id"]
        cats = [x['category'] for x in doc['class']]
        out_dict = {'id': doc_id}
        for category in categories:
            if category in cats:
                out_dict[category] = 1
            else:
                out_dict[category] = 0
        classification_results.append(out_dict)

    df_out = pd.concat([df_out, pd.DataFrame(classification_results)])
    i_start+=20
    i_end+=20

In [None]:
# clean up ids
df_out['id'] = df_out['id'].astype('int')
df_out.index = df_out.id
df_out.index.name = None
#df_out.drop('id', axis=1, inplace=True)

df_out['test_flag'] = False
df_out.loc[df_out.index>=1000, 'test_flag'] = True

df_out.head()

In [None]:
# compare to known outputs 
y = df[categories].values.astype('float')
y_out = df_out[categories].values.astype('float')

y_test = df[df.test_flag][categories].values.astype('float')
y_out_test = df_out[df_out.test_flag][categories].values.astype('float')

In [None]:
# get acc, f1, prec, recall
extra = pd.DataFrame(
    {'metric': ['acc', 'f1', 'prec', 'recall'], 
     'pct': [
         accuracy_score(y, y_out),
         f1_score(y, y_out, average='micro'),
         precision_score(y, y_out, average='micro'),
         recall_score(y, y_out, average='micro')
     ],
     'pct_test': [
         accuracy_score(y_test, y_out_test),
         f1_score(y_test, y_out_test, average='micro'),
         precision_score(y_test, y_out_test, average='micro'),
         recall_score(y_test, y_out_test, average='micro')
     ]
    }
     )

In [None]:
extra.to_csv(out_folder+'/rbcm_q7_summary_language_service.csv', index=False)