In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score

# Load the datasets
train_data = pd.read_csv('atis_intents_train.csv')
test_data = pd.read_csv('atis_intents_test.csv')
full_data = pd.read_csv('atis_intents.csv')

# Rename columns for consistent handling
train_data.columns = ['intent', 'text']
test_data.columns = ['intent', 'text']
full_data.columns = ['intent', 'text']

# Preview the datasets
print("Training Data:")
print(train_data.head())
print("\nTesting Data:")
print(test_data.head())
print("\nFull Dataset:")
print(full_data.head())

# Preprocessing: Extract text and labels
train_texts, train_labels = train_data['text'], train_data['intent']
test_texts, test_labels = test_data['text'], test_data['intent']

# Vectorization (Converting text to numerical data)
vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(train_texts)
X_test = vectorizer.transform(test_texts)

# Model Training: Using Naive Bayes for classification
model = MultinomialNB()
model.fit(X_train, train_labels)

# Model Evaluation
predictions = model.predict(X_test)
print("\nClassification Report:")
print(classification_report(test_labels, predictions))
print("\nAccuracy:", accuracy_score(test_labels, predictions))

# Predict on full dataset if needed
full_texts = full_data['text']
full_predictions = model.predict(vectorizer.transform(full_texts))
full_data['predicted_intent'] = full_predictions

# Save predictions to a CSV file
full_data.to_csv('atis_full_predictions.csv', index=False)
print("\nPredictions saved to 'atis_full_predictions.csv'")


Training Data:
             intent                                               text
0       atis_flight   what flights are available from pittsburgh to...
1  atis_flight_time   what is the arrival time in san francisco for...
2      atis_airfare            cheapest airfare from tacoma to orlando
3      atis_airfare   round trip fares from pittsburgh to philadelp...
4       atis_flight   i need a flight tomorrow from columbus to min...

Testing Data:
         intent                                               text
0  atis_airfare   on april first i need a ticket from tacoma to...
1   atis_flight   on april first i need a flight going from pho...
2   atis_flight   i would like a flight traveling one way from ...
3   atis_flight   i would like a flight from orlando to salt la...
4   atis_flight   i need a flight from toronto to newark one wa...

Full Dataset:
             intent                                               text
0       atis_flight   what flights are available from pi