In [6]:
import pandas as pd

cols = ['id', 'text', 'label', 'intensity']
path = "./"  

anger_train = pd.read_csv(path + 'angertraindata.txt', header=None, sep='\t', names=cols, index_col=0)
fear_train = pd.read_csv(path + 'feartraindata.txt', header=None, sep='\t', names=cols, index_col=0)
joy_train = pd.read_csv(path + 'joytraindata.txt', header=None, sep='\t', names=cols, index_col=0)
sad_train = pd.read_csv(path + 'sadtraindata.txt', header=None, sep='\t', names=cols, index_col=0)


In [8]:
import re
from nltk.corpus import stopwords
from nltk.tokenize import WordPunctTokenizer

tok = WordPunctTokenizer()
pat1 = r'@[A-Za-z0-9_]+'
pat2 = r'https?://[A-Za-z0-9./]+'
pat3 = r'[0-9]+'
combined_pat = r'|'.join((pat1, pat2, pat3))


stop_words = set(stopwords.words('english'))

def tweet_cleaner(text):
    stripped = re.sub(combined_pat, '', text)
    lower_case = stripped.lower()
    words = tok.tokenize(lower_case)
    filtered_words = [w for w in words if not w in stop_words]
    return ' '.join(filtered_words).strip()


In [9]:
anger_train['clean_text'] = anger_train['text'].apply(tweet_cleaner)
fear_train['clean_text'] = fear_train['text'].apply(tweet_cleaner)
joy_train['clean_text'] = joy_train['text'].apply(tweet_cleaner)
sad_train['clean_text'] = sad_train['text'].apply(tweet_cleaner)


In [10]:
print("Cleaned Anger Train Data:")
print(anger_train.head())
print("\n\n\n")

print("Cleaned Fear Train Data:")
print(fear_train.head())
print("\n\n\n")

print("Cleaned Joy Train Data:")
print(joy_train.head())
print("\n\n\n")

print("Cleaned Sad Train Data:")
print(sad_train.head())
print("\n\n\n")


Cleaned Anger Train Data:
                                                    text  label  intensity  \
id                                                                           
10000  How the fu*k! Who the heck! moved my fridge!.....  anger      0.938   
10001  So my Indian Uber driver just called someone t...  anger      0.896   
10002  @DPD_UK I asked for my parcel to be delivered ...  anger      0.896   
10003  so ef whichever butt wipe pulled the fire alar...  anger      0.896   
10004  Don't join @BTCare they put the phone down on ...  anger      0.896   

                                              clean_text  
id                                                        
10000  fu * k ! heck ! moved fridge !... knock landlo...  
10001  indian uber driver called someone n word . ' m...  
10002  asked parcel delivered pick store address # fu...  
10003  ef whichever butt wipe pulled fire alarm davis...  
10004  ' join put phone , talk rude . taking money ac...  




Cleaned Fe

In [11]:
from sklearn.feature_extraction.text import TfidfVectorizer


tfidf_vectorizer = TfidfVectorizer(max_features=5000)  

X_anger_train = tfidf_vectorizer.fit_transform(anger_train['clean_text'])
X_fear_train = tfidf_vectorizer.fit_transform(fear_train['clean_text'])
X_joy_train = tfidf_vectorizer.fit_transform(joy_train['clean_text'])
X_sad_train = tfidf_vectorizer.fit_transform(sad_train['clean_text'])


In [18]:

svm_model_anger = LinearSVR(epsilon=0.0, dual=False, loss='squared_epsilon_insensitive')
svm_model_fear = LinearSVR(epsilon=0.0, dual=False, loss='squared_epsilon_insensitive')
svm_model_joy = LinearSVR(epsilon=0.0, dual=False, loss='squared_epsilon_insensitive')
svm_model_sad = LinearSVR(epsilon=0.0, dual=False, loss='squared_epsilon_insensitive')


svm_model_anger.fit(X_anger_train, y_anger_train)
svm_model_fear.fit(X_fear_train, y_fear_train)
svm_model_joy.fit(X_joy_train, y_joy_train)
svm_model_sad.fit(X_sad_train, y_sad_train)


In [19]:
from sklearn.metrics import mean_squared_error
import numpy as np

def calculate_rmse(model, X, y):
    y_pred = model.predict(X)
    return np.sqrt(mean_squared_error(y, y_pred))

rmse_anger = calculate_rmse(svm_model_anger, X_anger_train, y_anger_train)
rmse_fear = calculate_rmse(svm_model_fear, X_fear_train, y_fear_train)
rmse_joy = calculate_rmse(svm_model_joy, X_joy_train, y_joy_train)
rmse_sad = calculate_rmse(svm_model_sad, X_sad_train, y_sad_train)

print(f"RMSE for Anger: {rmse_anger}")
print(f"RMSE for Fear: {rmse_fear}")
print(f"RMSE for Joy: {rmse_joy}")
print(f"RMSE for Sadness: {rmse_sad}")


RMSE for Anger: 0.06292095133111064
RMSE for Fear: 0.0704469580665551
RMSE for Joy: 0.07229027429011231
RMSE for Sadness: 0.06536418307972738
