In [15]:
import pandas as pd

# Load the dataset into a DataFrame
df = pd.read_csv('chat_dataset.csv')

# Explore the dataset
print(df.head())
print()
print(df['sentiment'].value_counts())

                              message sentiment
0          I really enjoyed the movie  positive
1               The food was terrible  negative
2  I'm not sure how I feel about this   neutral
3           The service was excellent  positive
4              I had a bad experience  negative

neutral     259
positive    178
negative    147
Name: sentiment, dtype: int64


In [18]:
import re

# Text preprocessing function
def preprocess_text(text):
    # Remove URLs
    text = re.sub(r'http\S+|www\S+|https\S+', '', text)
    
    # Remove special characters and numbers
    text = re.sub(r'[^A-Za-z\s]', '', text)
    
    # Convert to lowercase
    text = text.lower()
    
    # Remove extra whitespaces
    text = re.sub('\s+', ' ', text).strip()
    
    return text

# Apply text preprocessing to the 'message' column
df['clean_message'] = df['message'].apply(preprocess_text)
print(df)

                                               message sentiment  \
0                           I really enjoyed the movie  positive   
1                                The food was terrible  negative   
2                   I'm not sure how I feel about this   neutral   
3                            The service was excellent  positive   
4                               I had a bad experience  negative   
..                                                 ...       ...   
579  I have to cancel my vacation plans because I c...  negative   
580  My computer crashed and I lost all my importan...  negative   
581  I got into a car accident and my car is totale...  negative   
582  I have a cold and can't stop coughing. it's re...  negative   
583  I just found out my ex is dating someone new. ...  negative   

                                         clean_message  
0                           i really enjoyed the movie  
1                                the food was terrible  
2           

In [19]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Create a TF-IDF vectorizer
vectorizer = TfidfVectorizer()

# Transform the preprocessed text data into numerical features
X = vectorizer.fit_transform(df['clean_message'])

# Convert the sentiment labels into numerical values
y = df['sentiment'].map({'positive': 1, 'negative': -1, 'neutral': 0})

In [20]:
from sklearn.model_selection import train_test_split

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [21]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Initialize and train the SVM model
svm = SVC()
svm.fit(X_train, y_train)

# Make predictions on the test set
y_pred = svm.predict(X_test)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1)

Accuracy: 0.8632478632478633
Precision: 0.8790823211875844
Recall: 0.8632478632478633
F1-Score: 0.858254597955964


In [22]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid for grid search
param_grid = {'C': [1, 10, 100], 'gamma': [0.1, 0.01, 0.001]}

# Perform grid search for hyperparameter tuning
grid_search = GridSearchCV(SVC(), param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Get the best hyperparameters
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

Best Hyperparameters: {'C': 100, 'gamma': 0.1}


In [30]:
# Predict sentiment for new chat messages
new_messages = ['I love this product!', 'This is horrible', 'It seems okay.']
preprocessed_new_messages = [preprocess_text(msg) for msg in new_messages]
X_new = vectorizer.transform(preprocessed_new_messages)
sentiment_predictions = svm.predict(X_new)

for message, sentiment in zip(new_messages, sentiment_predictions):
    print("Message:", message)
    print("Sentiment:", sentiment)
    print()

Message: I love this product!
Sentiment: 1

Message: This is horrible
Sentiment: -1

Message: It seems okay.
Sentiment: 0

