# Building the Model

## Imports

In [1]:
import pandas as pd
import sys
import os
from sklearn.model_selection import train_test_split

# Fixing routing issue
project_root = os.path.abspath('..')
sys.path.append(project_root)

## Load and Prep Data

In [2]:
df = pd.read_csv("../data/processed/golden_intent_labeled.csv")

# Quick peep
df[['cleaned_text', 'intent']].head()

Unnamed: 0,cleaned_text,intent
0,whats that egg website people talk about,Other
1,why ios,Other
2,we can assist you we recommend updating to io...,Other
3,thats better than having an unstable connecti...,Other
4,is probably one of the best airlines ive ever...,Other


## Split Data

In [3]:
X = df['cleaned_text']
Y = df['intent']

X_train, X_test, y_train, y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42, stratify=Y
)

X_train = X_train.fillna("")
X_test = X_test.fillna("")


## Model Creation and Evaluation

In [5]:
from scripts.evaluate_model import evaluate_model

# Baseline
evaluate_model(df)

# With tweet length
evaluate_model(df, extra_feature_names=['tweet_length'])    

# With question flag
evaluate_model(df, extra_feature_names=['is_question'])

# With sentiment
evaluate_model(df, extra_feature_names=['sentiment_score'])


Unnamed: 0,tweet_text,true_intent,predicted_intent,confidence_score,correct,sentiment_score
4282,well the network ran fine when i kept dropbox...,Praise/Thank You,Other,0.882973,False,0.158333
7615,got my order completely wrong how do i get th...,Other,Other,0.920311,True,-0.500000
9703,looping in to assist you further pl,Other,Other,0.897715,True,0.000000
2955,our team is reviewing your email and will be ...,Praise/Thank You,Praise/Thank You,0.683889,True,0.000000
7372,does adobeillustrator have a story feature for...,Praise/Thank You,Praise/Thank You,0.813294,True,0.350000
...,...,...,...,...,...,...
3559,when is the issue with apple music going be f...,Technical Support,Technical Support,0.959119,True,0.125000
4822,thank you for my dinner from tmobiletuesday,Praise/Thank You,Praise/Thank You,0.984915,True,0.000000
1732,when i lived in york i often had this problem...,Technical Support,Technical Support,0.748925,True,-0.500000
4079,these features are accountbased and you shoul...,Account Management,Other,0.897679,False,0.450000
