<a href="https://colab.research.google.com/github/chidinma-godwin/course-recommendation-nlp/blob/main/generate_query_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This notebook demonstrates the process of generating query data using the CLINC150 dataset and manually generated examples, which are further augmented through paraphrasing techniques. This approach helps create a robust and diverse dataset for the conversational course recommendation system.

In [19]:
!pip install -q datasets

In [20]:
!pip install -q spacy

In [21]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import random
from sklearn.utils import shuffle
import spacy
from datasets import load_dataset
import torch
from transformers import PegasusTokenizer, PegasusForConditionalGeneration

In [None]:
# Define the device variable
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
# Load the spaCy English model and customize the tokenizer's infix rules to exclude specific patterns
nlp = spacy.load('en_core_web_sm')

infixes = list(nlp.Defaults.infixes)
infixes = [pattern for pattern in infixes if not ('-' in pattern or '_' in pattern or '/' in pattern)]
infixes_regex = spacy.util.compile_infix_regex(infixes)
nlp.tokenizer.infix_finditer = infixes_regex.finditer

In [None]:
# Load the Pegasus model and tokenizer. The model is fine-tuned for paraphrasing
model_name = 'tuner007/pegasus_paraphrase'
model = PegasusForConditionalGeneration.from_pretrained(model_name).to(device)
tokenizer = PegasusTokenizer.from_pretrained(model_name)


# Function to paraphrase a list of texts
def paraphrase_intents(intents, max_length=100, num_beams=10):
    paraphrased_intents = []
    for text in intents:
        # Tokenize the input text and prepare it for the model
        batch = tokenizer([text],padding='longest', return_tensors='pt', max_length=max_length, truncation=True).to(device)
        # Generate paraphrased text and decode the generated tokens back into text
        paraphrased_ids = model.generate(**batch, max_length=max_length, num_beams=num_beams)
        paraphrased_text = tokenizer.batch_decode(paraphrased_ids, skip_special_tokens=True)
        paraphrased_intents.append(paraphrased_text[0])
    return paraphrased_intents

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at tuner007/pegasus_paraphrase and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/86.0 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

In [None]:
# Load the courses dataset into pandas dataframe
base_dir = '/content/drive/MyDrive/project'
df = pd.read_csv(f'{base_dir}/courseslist.csv')

In [None]:
dataset = load_dataset('clinc_oos', 'plus')

Downloading data:   0%|          | 0.00/312k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/77.8k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/136k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/15250 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/3100 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5500 [00:00<?, ? examples/s]

In [None]:
# Extract the required samples from the CLINC150 dataset.
combined_df = pd.concat([pd.DataFrame(dataset['train']), pd.DataFrame(dataset['validation']), pd.DataFrame(dataset['test'])])
df_selected = combined_df[(combined_df['intent']==82) | (combined_df['intent']==114) | (combined_df['intent']==124)]
df_oos = combined_df[combined_df['intent'] == 42].sample(n=450, random_state=42)

clinc_df = pd.concat([df_selected, df_oos]).reset_index(drop=True)
clinc_df = clinc_df.astype(str)
clinc_df

# Rename the selected intents
clinc_df.loc[clinc_df['intent'] == "82", 'intent'] = 'courtesy'
clinc_df.loc[clinc_df['intent'] == "114", 'intent'] = 'courtesy'
clinc_df.loc[clinc_df['intent'] == "124", 'intent'] = 'courtesy'
clinc_df.loc[clinc_df['intent'] == "42", 'intent'] = 'oos'
clinc_df['entities'] = [{} for _ in range(len(clinc_df))]
clinc_df

Unnamed: 0,text,intent,entities
0,"you have been a big help, thank you so much",courtesy,{}
1,"i appreciate your help, thank you",courtesy,{}
2,"i really appreciate your help, thank you",courtesy,{}
3,i am very grateful,courtesy,{}
4,i appreciate the assistance,courtesy,{}
...,...,...,...
895,what happens if you're skydiving and your para...,oos,{}
896,what was the original name of jesus in egyptia...,oos,{}
897,find the best prices for toothpaste at any sto...,oos,{}
898,can you check to see which parking garages are...,oos,{}


In [None]:
recommend_intents = [
    {"text": "Can you recommend some good {} courses on {} taught by {} with a duration of {} and a rating of {}?", "slots": ["level", "topic", "institution", "duration", "rating"]},
    {"text": "I'm looking for a {} course on {} offered by {} that is certified and costs {}.", "slots": ["level", "topic", "provider", "pricing"]},
    {"text": "What are the best {} courses available for {} with a start date of {}?", "slots": ["level", "topic", "start_date"]},
    {"text": "Could you suggest a {} {} course with {} duration and a rating of {}?", "slots": ["level", "topic", "duration", "rating"]},
    {"text": "I need a course on {} that lasts {} and costs {}.", "slots": ["topic", "duration", "pricing"]},
    {"text": "Please recommend a {} course with a {} rating that is offered by {} and has a start date of {}.", "slots": ["topic", "rating", "provider", "start_date"]},
    {"text": "Any recommendations for {} courses that are certified and have a duration of {}?", "slots": ["topic", "duration"]},
    {"text": "Suggest some {} courses with good reviews by {} students, offered by {} with a duration of {} and a rating of {}.", "slots": ["topic", "num_rating", "provider", "duration", "rating"]},
    {"text": "Which courses would you recommend for learning {} that are certified, last {} and are priced at {}?", "slots": ["topic", "duration", "pricing"]},
    {"text": "Do you have any course suggestions for {} offered by {} that are rated {}?", "slots": ["topic", "institution", "rating"]},
    {"text": "Can you recommend an {} course on {} from {} with a duration of {} and a rating of {}?", "slots": ["level", "topic", "institution", "duration", "rating"]},
    {"text": "I'm looking for courses reviewed by {} people on {} that are certified and cost {}.", "slots": ["num_rating", "topic", "pricing"]},
    {"text": "What {} {} courses can you recommend that are offered by {} with a start date of {}?", "slots": ["level", "topic", "provider", "start_date"]},
    {"text": "Could you recommend a {} {} course that has a duration of {} and costs {}?", "slots": ["level", "topic", "duration", "pricing"]},
    {"text": "Are there any {} {} courses rated by {} students that you can recommend?", "slots": ["institution", "topic", "num_rating"]},
    {"text": "Please suggest a {} course from {} with a duration of {}, priced at {}", "slots": ["topic", "institution", "duration", "pricing"]},
    {"text": "I would like a {} level course on {} that offers a certification, lasts {} and has a rating of {}.", "slots": ["level", "topic", "duration", "rating"]},
    {"text": "Can you recommend a {} course with a start date of {}, priced at {} and offered by {} with {} ratings?", "slots": ["topic", "start_date", "pricing", "provider", "num_rating"]},
    {"text": "I'm interested in a {} course that is certified, lasts {}", "slots": ["topic", "duration"]},
    {"text": "What {} courses with certificate can you suggest that are offered by {} and have a rating of {}?", "slots": ["topic", "provider", "rating"]},
    {"text": "Is there a {} course on {} offered by {} with a duration of {} and a rating of {}?", "slots": ["level", "topic", "provider", "duration", "rating"]},
    {"text": "Could you recommend a {} course with {} start date and costs {}?", "slots": ["topic", "start_date", "pricing"]},
    {"text": "What are the best-rated {} courses available from {} that last {} and cost {}?", "slots": ["topic", "provider", "duration", "pricing"]},
    {"text": "Suggest a {} course with a duration of {} that is certified and costs {}. It should be rated by {} peope", "slots": ["topic", "duration", "pricing", "num_rating"]},
    {"text": "Are there any {} courses with a certification and a start date of {} that you can recommend?", "slots": ["topic", "start_date"]},
    {"text": "Can you recommend a {} course on {} with a rating of {}", "slots": ["level", "topic", "rating"]},
    {"text": "I'm looking for a {} course with {} reviews that offers a certification and lasts {}.", "slots": ["topic", "num_rating", "duration"]},
    {"text": "Do you have any {} courses with a rating of {} offered by {} that are priced at {}?", "slots": ["topic", "rating", "provider", "pricing"]},
    {"text": "Can you recommend a certified {} course on {} with a link to more details?", "slots": ["level", "topic"]},
    {"text": "What is the best {} course available with a rating of {} at {}?", "slots": ["topic", "rating", "institution"]},
    {"text": "I'm looking for a course on {} with a certification, offered by {} and has a duration of {}.", "slots": ["topic", "provider", "duration"]},
    {"text": "Suggest a {} level course on {} with a start date of {} and a rating of {}.", "slots": ["level", "topic", "start_date", "rating"]},
    {"text": "Can you recommend a {} course that is certified, lasts {} and is priced at {}?", "slots": ["topic", "duration", "pricing"]},
    {"text": "What {} courses with {} reviews are available from {} with a duration of {}?", "slots": ["topic", "num_rating", "provider", "duration"]},
    {"text": "Can you recommend a {} course on {} offered by {} that starts {} and has a rating of {}?", "slots": ["level", "topic", "provider", "start_date", "rating"]},
    {"text": "I need a {} course that lasts {} and costs {}. Any suggestions?", "slots": ["topic", "duration", "pricing"]},
    {"text": "Can you suggest some {} courses with a duration of {}", "slots": ["topic", "duration"]},
    {"text": "I'm looking for a {} course on {} with a rating of {} from {}.", "slots": ["level", "topic", "rating", "provider"]},
    {"text": "What are some good {} courses with {} reviews and a duration of {} offered by {}?", "slots": ["topic", "num_rating", "duration", "institution"]},
    {"text": "Can you recommend a {} level {} course with a rating of {}", "slots": ["level", "topic", "rating"]},
    {"text": "I'm interested in a {} course that lasts {}. What are the options?", "slots": ["topic", "duration"]},
    {"text": "What {} courses are available with a rating of {} and a duration of {} from {}?", "slots": ["topic", "rating", "duration", "provider"]},
    {"text": "Can you suggest a {} course with a duration of {} that starts {} and has a rating of {}?", "slots": ["topic", "duration", "start_date", "rating"]},
    {"text": "Are there any {} courses from {} with a rating of {}", "slots": ["topic", "institution", "rating"]},
    {"text": "What are some good {} courses that last {} and have a link for more details?", "slots": ["topic", "duration"]},
    {"text": "Can you recommend a {} course with a start date of {} and a rating of {} from {}?", "slots": ["topic", "start_date", "rating", "provider"]},
    {"text": "I'm looking for a {} course on {} from {} with a duration of {} and offered by {}.", "slots": ["level", "topic", "institution", "duration", "provider"]},
    {"text": "What {} courses from {} with {} reviews and a rating of {} can you recommend?", "slots": ["topic", "institution", "num_rating", "rating"]},
    {"text": "Can you suggest a {} course with a duration of {} and a rating of {}", "slots": ["topic", "duration", "rating"]},
    {"text": "Is there a certified {} course on {} that is offered by {} and costs {}?", "slots": ["level", "topic", "provider", "pricing"]},
    {"text": "What are the best {} courses available with a duration of {}?", "slots": ["topic", "duration"]},
    {"text": "Can you recommend a {} course with a rating of {} by {} students", "slots": ["topic", "rating", "num_rating"]},
    {"text": "I'm interested in a {} course that lasts {} and cost {}.", "slots": ["topic", "duration", "pricing"]},
    {"text": "What {} courses are available with a rating of {} and a start date of {} from {}?", "slots": ["topic", "rating", "start_date", "provider"]},
    {"text": "Can you suggest some {} {} courses with a duration of {}?", "slots": ["institution", "topic", "duration"]},
    {"text": "Are there any {} courses offered by {} with a start date of {} and a rating of {}?", "slots": ["topic", "provider", "start_date", "rating"]},
    {"text": "What {} courses with {} reviews can you recommend?", "slots": ["topic", "num_rating"]},
    {"text": "Can you recommend a {} course with a duration of {} and a link to more details?", "slots": ["topic", "duration"]},
    {"text": "I'm looking for a {} course with a start date of {} and rated by {} people.", "slots": ["topic", "start_date", "num_rating"]},
    {"text": "What {} courses with a rating of {} and a start date of {} can you recommend?", "slots": ["topic", "rating", "start_date"]},
    {"text": "Can you suggest a certified {} {} course with a rating of {} taught by {}", "slots": ["level", "topic", "rating", "institution"]},
    {"text": "I like the courses you suggested. Can you recommend another {} {} course?", "slots": ["pricing", "level"]},
    {"text": "Can you find me an {} course with a {} duration than the ones you mentioned earlier?", "slots": ["level", "duration"]},
    {"text": "Thanks for those recommendations! How about a course with {} of content?", "slots": ["duration"]},
    {"text": "Those courses sound great. Can you recommend a course with {} reviews?", "slots": ["rating"]},
    {"text": "Could you suggest a course by {} that is similar to the one you mentioned but {}?", "slots": ["institution", "pricing"]},
    {"text": "I’m interested in a certified {} courses. Can you recommend one?", "slots": ["level"]},
    {"text": "I prefer courses from {}. Can you recommend a course by them?", "slots": ["institution"]},
    {"text": "Could you suggest a course that’s {} than the ones you listed?", "slots": ["duration"]},
    {"text": "I’m looking for a course with a rating of {}. Do you have any other suggestions?", "slots": ["rating"]},
    {"text": "What about a course that is available for {}? Any recommendations?", "slots": ["pricing"]}
]

details_intents = [
    {"text": "Tell me more about the {} course. What is its duration and certification status?", "slots": ["course_name"]},
    {"text": "I want details on the {} course including its start date, level, and pricing.", "slots": ["course_name"]},
    {"text": "Can you provide more information on the {} course, such as its rating and the duration?", "slots": ["course_name"]},
    {"text": "Give me some details on the {} class, including the course link and the provider.", "slots": ["course_name"]},
    {"text": "I need to know more about the {} course. Is it self-paced, and what is the start date?", "slots": ["course_name"]},
    {"text": "What does the {} course cover? Can you tell me about its prerequisites?", "slots": ["course_name"]},
    {"text": "Describe the {} course to me, including its level, duration.", "slots": ["course_name"]},
    {"text": "What's included in the {} course? Please provide details about the provider, pricing, and course link.", "slots": ["course_name"]},
    {"text": "Could you explain the content of the {} course, including the topics covered?", "slots": ["course_name"]},
    {"text": "I'm interested in the {} course. Can you tell me more about its duration and rating?", "slots": ["course_name"]},
    {"text": "What is the level of the {} course? Does it have any special reviews?", "slots": ["course_name"]},
    {"text": "Is the {} course certified? What is the pricing and who is the provider?", "slots": ["course_name"]},
    {"text": "How long is the {} course, and does it have any special requirements?", "slots": ["course_name"]},
    {"text": "What rating does the {} course have? Also, provide information about the institution.", "slots": ["course_name"]},
    {"text": "Who is the provider of the {} course? What are the course's duration, pricing, and certification details?", "slots": ["course_name"]},
    {"text": "Can you give me details on the {} course, such as its start date, duration, and rating?", "slots": ["course_name"]},
    {"text": "Does the {} course offer a certification? What is the level and the duration of the course?", "slots": ["course_name"]},
    {"text": "Is the {} course suitable for beginners? Can you provide details about its pricing?", "slots": ["course_name"]},
    {"text": "What topics are covered in the {} course? What is its start date?", "slots": ["course_name"]},
    {"text": "Can you provide the syllabus for the {} course, including information on the pricing?", "slots": ["course_name"]},
    {"text": "What are the prerequisites for the {} course? What is the course's duration?", "slots": ["course_name"]},
    {"text": "Is the {} course self-paced? What are the start date and pricing details?", "slots": ["course_name"]},
    {"text": "Does the {} course have good reviews? What is the rating?", "slots": ["course_name"]},
    {"text": "What is the rating and duration of the {} course?", "slots": ["course_name"]},
    {"text": "What is the difficulty level of the {} course? Is it self-paced and what is the start date?", "slots": ["course_name"]},
    {"text": "Is there any certification upon completing the {} course? What is the course's duration and pricing?", "slots": ["course_name"]},
    {"text": "Can you provide more info on the {} course provider, including their courses and ratings?", "slots": ["course_name"]},
    {"text": "Is the {} course self-paced? What are the details on the institution offering it?", "slots": ["course_name"]},
    {"text": "What are its start date, duration, and pricing details of the {} course", "slots": ["course_name"]},
    {"text": "When does the {} course start? What is its duration?", "slots": ["course_name"]},
    {"text": "What is the price of the {} course? Can you also provide information about the course's certification?", "slots": ["course_name"]},
    {"text": "How many ratings does the {} course have? Who is the provider?", "slots": ["course_name"]},
    {"text": "What are the pricing details of {}", "slots": ["course_name"]},
    {"text": "Which institution is offering the {} course? Can you provide details on its duration, pricing, and rating?", "slots": ["course_name"]},
    {"text": "Is the {} course priced? What are the details about the certification?", "slots": ["course_name"]},
    {"text": "Is there a certification for completing the {} course? What is the course's duration and the institution offering it?", "slots": ["course_name"]},
    {"text": "Can you provide information on their pricing of {}?", "slots": ["course_name"]},
    {"text": "I want to enroll in the {} course. Can you provide details about its start date and enrollment process?", "slots": ["course_name"]},
    {"text": "How can I sign up for the {} class? What are the details regarding its pricing?", "slots": ["course_name"]},
    {"text": "I am interested in enrolling in the {} course. Can you tell me about the enrollment dates and the course's duration?", "slots": ["course_name"]},
    {"text": "What is the course link for the {} course? Can you also provide information on its pricing and start date?", "slots": ["course_name"]},
    {"text": "I would like to register for the {} class. What are the details on the course's certification?", "slots": ["course_name"]},
    {"text": "What is the process to enroll in the {} course? Please include details on its pricing, duration, and start date.", "slots": ["course_name"]},
    {"text": "I'd like to sign up for the {} course. Can you provide details on its start date and certification?", "slots": ["course_name"]},
    {"text": "I want to join the {} course. What are the details on its enrollment process, duration, and pricing?", "slots": ["course_name"]},
    {"text": "How do I register for the {} course? Can you provide information about the course link?", "slots": ["course_name"]},
    {"text": "I need help enrolling in the {} course. What are the details regarding its start date, duration?", "slots": ["course_name"]},
    {"text": "Can I enroll in the {} course now? What is the process and what are the details on its pricing?", "slots": ["course_name"]},
    {"text": "How can I join the {} course? Please provide details on the course's link, start date.", "slots": ["course_name"]},
    {"text": "What steps do I need to take to enroll in the {} course? Can you also provide details about its pricing?", "slots": ["course_name"]},
    {"text": "Is enrollment open for the {} course? What are the details regarding the course's duration?", "slots": ["course_name"]},
    {"text": "I want to start the {} course. How can I enroll, and what are the details on its pricing and start date?", "slots": ["course_name"]},
    {"text": "What are the enrollment dates for the {} course? Can you also provide details on its duration and pricing?", "slots": ["course_name"]},
    {"text": "Where can I sign up for the {} course? What is the enrollment fee and the start date?", "slots": ["course_name"]},
    {"text": "Is there an enrollment fee for the {} course? Please provide details on its start date.", "slots": ["course_name"]},
    {"text": "What is the deadline to enroll in the {} course? Can you provide information on its duration?", "slots": ["course_name"]},
    {"text": "I need information on how to register for the {} course. What are the details on its pricing and start date?", "slots": ["course_name"]},
    {"text": "Is the {} course enrollment open? What are the details about it", "slots": ["course_name"]},
    {"text": "Please provide the enrollment link for the {} course. What are the details on its duration and pricing?", "slots": ["course_name"]},
    {"text": "Tell me more about the {} course", "slots": ["course_name"]},
    {"text": "Can you tell me more about the course you mentioned earlier?", "slots": []},
    {"text": "What is the course duration?", "slots": []},
    {"text": "Does the course by {} offer a certificate?", "slots": ["institution"]},
    {"text": "How much does the course cost?", "slots": ["pricing"]},
    {"text": "Can you give me more details on the course that has a high rating?", "slots": ["rating"]},
    {"text": "Is the course you suggested self-paced?", "slots": []},
    {"text": "What are the prerequisites for the course?", "slots": []},
    {"text": "Can you provide the course outline for the course by {}?", "slots": ["institution"]},
    {"text": "What topics are covered in the course you mentioned?", "slots": []},
    {"text": "Does the course include any project work?", "slots": []}
]


topics = df['subject'].values

entities = ["rating", "topic", "institution", "level", "course_name",
    "num_rating", "provider", "duration", "pricing", "start_date"]

entity_values = {
    "rating": ["high", "low", "4.5", "5.0", "greater than 4.0", "lesser than 3.8", "above 3.5"],
    "topic": df.loc[df["subject"] != "Uncategorized", "subject"].values,
    "institution": [*df.loc[df['institution'].notna(), 'institution'].values, "top university"],
    "level": ["beginner", "intermediate", "advanced"],
    "course_name": df['course_name'].values,
    "num_rating": ["1000", "2000", "more than 1500", "less than 500"],
    "provider": df.loc[df['provider'].notna(), 'provider'].values,
    "duration": ["4 weeks", "6 months 3 weeks", "2 months", "more than 3 months", "less than 4 months", "self paced", "short", "long",],
    "pricing": [*[f"${random.randint(50, 10000)}" for _ in range(20)], "free", "cheap"],
    "start_date": ["June 1", "July 15", "August 20", "flexible"]
}


def generate_samples(intents):
    paraphrased = []
    all_slots= []

    for i in range(6):
        samples = []
        slots = []
        for intent_obj in intents:
            slots_obj = {s: random.choice(entity_values[s]) for s in intent_obj['slots']}
            samples.append(intent_obj["text"].format(*slots_obj.values()))
            slots.append(slots_obj)

        if i == 0:
            paraphrased.extend(samples)
        else:
            paraphrased.extend(paraphrase_intents(samples))

        all_slots.extend(slots)

    return paraphrased, all_slots


# Generate sample queries for recommend and details intents
recommend_samples, recommend_slots_obj = generate_samples(recommend_intents)
details_samples, details_slots_obj = generate_samples(details_intents)

# Create a DataFrame for the generated data
data = {
    'text': [sample for sample in recommend_samples + details_samples],
    'intent': ['recommend'] * len(recommend_samples) + ['details'] * len(details_samples),
    'entities': [entity for entity in recommend_slots_obj + details_slots_obj],
}
intent_df = pd.DataFrame(data)
queries_df = pd.concat([intent_df, clinc_df]).reset_index(drop=True)
queries_df['text'] = queries_df['text'].str.lower()

In [28]:
def find_first_common_index(text_tokens, entity_values_tokens):
    for index, word in enumerate(entity_values_tokens):
        if word in text_tokens:
            return text_tokens.index(word)
    return -1

# Function to get the slot tags for the texts
def annotate_slots(text, entities):
    doc = nlp(text)
    text_tokens = [token.text for token in doc]
    slots = ['B-certified' if 'certif' in token else 'O' for token in text_tokens]

    try:
        for entity, value in entities.items():
            entity_doc = nlp(value.lower())
            value_tokens = [token.text for token in entity_doc]
            for i, tok in enumerate(text_tokens):
                if tok in value_tokens:
                    start_idx = find_first_common_index(text_tokens, value_tokens)
                    if i == start_idx:
                        slots[start_idx] = f'B-{entity}'
                    else:
                        slots[i] = f'I-{entity}'
    except:
        raise ValueError()
    return slots


annotated_data = [annotate_slots(text, entities) for text, entities in zip(queries_df['text'], queries_df['entities'])]

queries_df['slot_tags'] = annotated_data
queries_df.drop(columns=['entities'], inplace=True)
queries_df = shuffle(queries_df, random_state=42).reset_index(drop=True)
queries_df.head()

Unnamed: 0,text,intent,slot_tags
0,is it possible to recommend a course with a st...,recommend,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', ..."
1,appreciate it,courtesy,"['O', 'O']"
2,"what is the duration, pricing, and rating of t...",details,"['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', ..."
3,peace out!,courtesy,"['O', 'O', 'O']"
4,"see you later, alligator",courtesy,"['O', 'O', 'O', 'O', 'O']"


In [None]:
queries_df.to_csv(f'{base_dir}/queries_df.csv', index=False)