In [5]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import re

# Load data from CSV file
df = pd.read_csv('filtered.csv')

# Clean tweet text
def clean_tweet_text(tweet):
    # Remove URLs from tweet text
    tweet = re.sub(r'http\S+', '', tweet)
    # Remove special characters and digits
    tweet = re.sub(r'[^\w\s]', '', tweet)
    tweet = re.sub(r'\d+', '', tweet)
    # Tokenize tweet text
    tokens = word_tokenize(tweet.lower())
    # Lemmatize tokens
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    # Return cleaned tweet text
    return ' '.join(tokens)

# Clean tweet text
df['clean_tweet_text'] = df['questionText'].apply(clean_tweet_text)

# Split data into training and testing sets
train = df.sample(frac=0.8, random_state=1)
test = df.drop(train.index)

# Convert cleaned tweet text into TF-IDF matrix
vectorizer = TfidfVectorizer(stop_words='english', max_features=1000)
X_train = vectorizer.fit_transform(train['clean_tweet_text']).toarray()
X_test = vectorizer.transform(test['clean_tweet_text']).toarray()

# Create label vectors for training and testing sets
y_train = train['topics'].values
y_test = test['topics'].values

# Train a Support Vector Machine classifier on the training data
svm = SVC(kernel='linear', random_state=1)
svm.fit(X_train, y_train)

# Define function to predict emotion from tweet text
def predict_emotion(tweet):
    # Clean tweet text
    clean_tweet = clean_tweet_text(tweet)
    # Convert cleaned tweet text to TF-IDF matrix
    X = vectorizer.transform([clean_tweet]).toarray()
    # Predict emotion using trained Support Vector Machine classifier
    predicted_emotion = svm.predict(X)[0]
    # Return predicted emotion
    return predicted_emotion


In [6]:
question = "I can't stop worrying about the future, my mind keeps racing with negative thoughts and I feel like I can't breathe."
predicted_emotion = predict_emotion(question)
print(predicted_emotion)

Anxiety
