In [1]:
import os
import httpx
import pandas as pd

grid_data = pd.read_excel(
    "../../../../data/external/GRID_Data.xlsm", sheet_name="Map", dtype=str
)
data = pd.read_csv("extracted.csv")

In [2]:
def create_client():
    ford_proxy = str(os.getenv("FORD_PROXY"))
    timeout_config = httpx.Timeout(10.0, connect=5.0)
    proxy_mounts = {
        "http://": httpx.HTTPTransport(proxy=httpx.Proxy(ford_proxy)),
        "https://": httpx.HTTPTransport(proxy=httpx.Proxy(ford_proxy)),
    }
    return httpx.Client(
        timeout=timeout_config,
        mounts=proxy_mounts,
        verify=False,
    )


def load_classifier_credentials():
    with create_client() as client:
        response = client.post(
            str(os.getenv("TOKEN_ENDPOINT")),
            data={
                "client_id": str(os.getenv("CLIENT_ID")),
                "client_secret": str(os.getenv("CLIENT_SECRET")),
                "scope": str(os.getenv("SCOPE")),
                "grant_type": "client_credentials",
            },
            timeout=160,
        )
    return {
        "url": str(os.getenv("API_ENDPOINT")),
        "token": response.json()["access_token"],
    }


def load_categories():
    categories = set()
    with open(
        "../../../../data/external/binnings.txt", encoding="utf-8", mode="r"
    ) as file:
        for line in file.readlines():
            binning = line.split(",")[1]
            categories.add(binning.strip("\n"))

    return frozenset(categories)

In [3]:
def classify_case(data, credentials) -> str:
    text = (
        f"{data}. For this sentences that, check if it is related to onl"
        + f"y one of the following categories: {list(load_categories())}"
        + ". Your answer must be only one of these categories. Note: 'OW"
        + "D' means 'opened while driving' and 'F&F' means 'fit and fini"
        + "sh', for problems related to flushness and margin. Note 2: Fo"
        + "r model Escape (2020 forward), there is a common problem rela"
        + "ted to door check arm when the complaint is related to the do"
        + "or making popping sounds, opening and closing problens, hinge"
        + "s and welds. If you cannot assist, answer NA. You should be o"
        + "bjective and cold. Never change the answer format mentioned a"
        + "nd Never create a new categorie."
    )
    content = {
        "model": "gpt-4",
        "context": (
            "You are a helpful text reader and analyzer. You need to give me 2 answers."
        ),  # sets the overall behavior of the assistant.
        "messages": [{"role": "user", "content": text}],
        "parameters": {
            "temperature": 0.05,  # Determines the randomnes of the model's response.
        },
    }
    response = httpx.post(
        credentials["url"],
        headers={"Authorization": f"Bearer {credentials['token']}"},
        json=content,
        timeout=360,
    )
    if response.status_code == 200:
        message = response.json()["content"]
        if "\n" in message:
            return message.split("\n")[0]

        return message
    return "NOT CLASSIFIED"

In [4]:
credentials = load_classifier_credentials()
data["Binning"] = data.apply(
    lambda row: classify_case(
        row["Issue Title"] + "," + row["Description"], credentials
    ),
    axis=1,
)
data["Binning"]
# REAR WINDOW | FELL OFF
# TAIL LIGHTS | CONDENSATION ->  TAIL LIGHTS | WATER LEAK
data.to_csv("GRID_PROCESSED_22-02-2024.csv")