In [None]:
import os
import openai
from dotenv import load_dotenv
from openai import OpenAI
import pandas as pd
from tqdm import tqdm  # For progress bar

# Load environment variables from the .env file
load_dotenv()

# Access the OpenAI API key
openai_api_key = ""

# Use the API key
if openai_api_key:
    print("OpenAI API Key loaded successfully!")
else:
    print("OpenAI API Key not found. Please check your .env file.")

# Define intent labels
intent_labels = [
    "SetReminder", "SetAlarm", "CreateCalendarEvent", "SendMessage", "SendEmail",
    "MakeCall", "OpenApp", "SearchWeb", "SetTimer", "CheckWeather",
    "TurnOnDevice", "TurnOffDevice", "AdjustBrightness", "AdjustTemperature",
    "LockDoor", "UnlockDoor", "StartVacuum", "StopVacuum", "CheckSecurityCamera",
    "SetScene", "PlayMusic", "PauseMusic", "SkipTrack", "PlayPodcast", "PlayVideo",
    "AdjustVolume", "SetPlaybackSpeed", "SearchMovie", "ShowTVGuide",
    "GetDirections", "CheckTraffic", "FindNearbyPlace", "EstimateArrivalTime",
    "StartNavigation", "StopNavigation", "SendTextMessage", "MakePhoneCall",
    "StartVideoCall", "CheckVoicemail", "ReadMessage", "ReplyToMessage",
    "SendGroupMessage", "AnswerGeneralQuestion", "DefineWord", "ConvertUnits",
    "GetSportsScores", "CheckStockPrice", "GetFact", "TranslateText",
    "MathCalculation", "FindPersonInfo", "GetNewsUpdate"
]

# Initialize OpenAI client
client = openai.OpenAI(api_key=openai_api_key)

def predict_intent(query):
    """
    Predicts the intent of a given query using OpenAI's GPT-4.

    Parameters:
        query (str): The user query.

    Returns:
        str: Predicted intent label.
    """
    prompt = f"""
    You are an AI assistant trained to classify user queries into predefined intent categories.
    Given the following user query, determine the most appropriate intent from the list below.

    Intent Labels:
    {', '.join(intent_labels)}

    User Query: "{query}"

    Provide only the intent label as output.
    """

    # Use the new OpenAI client syntax
    chat_completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": "You classify user queries into intents."},
            {"role": "user", "content": prompt}
        ],
        model="gpt-4o-mini",
    )

    # Retrieve the response content properly
    response_content = chat_completion.choices[0].message.content.strip()
    return response_content



def process_file(file_path, output_path):
    """
    Processes a TSV file to compute intent for the 'Rewritten Question' column.

    Parameters:
        file_path (str): Path to the input TSV file.
        output_path (str): Path to save the output TSV file.
    """
    # Read the TSV file
    df = pd.read_csv(file_path, sep="\t")

    # Add a progress bar
    print(f"Processing file: {file_path}")
    intents = []
    for i, query in enumerate(tqdm(df["Rewritten Question"], desc="Process")):
        try:
            intent = predict_intent(query)
            intents.append(intent)
            # Print the current query and its predicted intent
            # Add intents to DataFrame
            # File path
            file_path = "a.txt"

            # Save the list to a file
            with open(file_path, "w") as file:
                for item in intent:
                    file.write(f"{i}\n")

        except Exception as e:
            print(f"Error processing query: {query}, Error: {e}")
            intents.append("Error")  # Log error if prediction fails

    # Add intents to DataFrame
    df["intent"] = intents

    # Save the updated DataFrame to a new TSV file
    df.to_csv(output_path, sep="\t", index=False)
    print(f"Processing completed and saved to: {output_path}")


OpenAI API Key loaded successfully!


In [6]:
# File paths
train_file = "train.tsv"         # Path to the train file         # Path to the test file

# Output file paths
train_output_file = "train_with_intents.tsv"

process_file(train_file, train_output_file)

print("Intent classification completed for both train and test files.")

Processing file: train.tsv


Process: 100%|██████████| 31218/31218 [5:34:28<00:00,  1.56it/s]      


Processing completed and saved to: train_with_intents.tsv
Intent classification completed for both train and test files.
