In [1]:
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Read the CSV file into a DataFrame
df = pd.read_csv("Ai Trainer.txt", header=None, encoding='utf8', sep='\t')
# Initialize an instance of the SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

# Initialize empty lists to store the extracted values
dates = []
times = []
names = []
messages = []
sentiments = []  # New list for sentiment predictions

# Iterate through each row in the DataFrame
for text in df[0]:  # Assuming the message column is in the first column (0-indexed)
    # Split the text by " - " to extract the date and time
    parts = text.split(' - ', 1)

    # Check if the split produced two parts
    if len(parts) == 2:
        date_time = parts[0]
        date, time = date_time.split(', ', 1)
        message = parts[1]

        # Split the message by ": " to separate the name and message
        if ": " in message:
            name, message = message.split(": ", 1)
        else:
            # Set a default name when splitting is not possible
            name = "Ai Trainer"

        # Analyze sentiment of the message
        sentiment = analyzer.polarity_scores(message)['compound']

        # Append the extracted values to the lists
        dates.append(date)
        times.append(time)
        names.append(name)
        messages.append(message)
        sentiments.append(sentiment)

    else:
        # Handle the case where the split didn't produce two parts
        # You can choose to skip, print a message, or handle it as needed
        pass

# Ensure that the lengths of the lists match the length of the index
expected_length = len(df)
while len(dates) < expected_length:
    dates.append('')
    times.append('')
    names.append('Ai Trainer')
    messages.append('')
    sentiments.append(0.0)  # Default sentiment score

# Create new columns in the DataFrame for date, time, name, message, and sentiment
df['Date'] = dates
df['Time'] = times
df['Name'] = names
df['Message'] = messages
df['Sentiment'] = sentiments

# Print the DataFrame with separated columns
dataset=df[['Date', 'Time', 'Name', 'Message', 'Sentiment']]
dataset


Unnamed: 0,Date,Time,Name,Message,Sentiment
0,06/10/23,12:50 pm,Ai Trainer,Messages and calls are end-to-end encrypted. N...,-0.2960
1,09/10/23,11:34 am,Ai Trainer,Hi sir,0.0000
2,09/10/23,11:34 am,❤️Vignesh Vs 😘,Hi sir,0.0000
3,09/10/23,11:35 am,❤️Vignesh Vs 😘,<Media omitted>,0.0000
4,09/10/23,11:41 am,Ai Trainer,This message was deleted,0.0000
...,...,...,...,...,...
142,30/10/23,6:56 pm,❤️Vignesh Vs 😘,Soo free ya erutha call me sir,0.5106
143,30/10/23,11:10 pm,Ai Trainer,Tomorrow I will call you bro,0.0000
144,,,Ai Trainer,,0.0000
145,,,Ai Trainer,,0.0000


In [2]:
# Threshold for classifying sentiment
threshold = 0.0  # You can adjust this threshold as needed

# Create a new column to store sentiment labels (positive or negative)
dataset['Sentiment_Label'] = dataset['Sentiment'].apply(lambda x: 'Positive' if x > threshold else 'Negative')

# Print the DataFrame with sentiment labels
dataset=dataset[['Date', 'Time', 'Name', 'Message', 'Sentiment', 'Sentiment_Label']]
dataset


Unnamed: 0,Date,Time,Name,Message,Sentiment,Sentiment_Label
0,06/10/23,12:50 pm,Ai Trainer,Messages and calls are end-to-end encrypted. N...,-0.2960,Negative
1,09/10/23,11:34 am,Ai Trainer,Hi sir,0.0000,Negative
2,09/10/23,11:34 am,❤️Vignesh Vs 😘,Hi sir,0.0000,Negative
3,09/10/23,11:35 am,❤️Vignesh Vs 😘,<Media omitted>,0.0000,Negative
4,09/10/23,11:41 am,Ai Trainer,This message was deleted,0.0000,Negative
...,...,...,...,...,...,...
142,30/10/23,6:56 pm,❤️Vignesh Vs 😘,Soo free ya erutha call me sir,0.5106,Positive
143,30/10/23,11:10 pm,Ai Trainer,Tomorrow I will call you bro,0.0000,Negative
144,,,Ai Trainer,,0.0000,Negative
145,,,Ai Trainer,,0.0000,Negative


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics

# Define your feature (X) and target (y) variables
X = dataset['Message']
y = dataset['Sentiment_Label']  # Use 'Sentiment_Label' as the target variable

# Split the dataset into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a TfidfVectorizer object
tfidf_vectorizer = TfidfVectorizer(stop_words='english')

# Fit the vectorizer to the training data
tfidf_train = tfidf_vectorizer.fit_transform(X_train)

# Transform the test data using the fitted vectorizer
tfidf_test = tfidf_vectorizer.transform(X_test)

# Initialize the Multinomial Naive Bayes classifier
clf = MultinomialNB()

# Fit the classifier with the training data
clf.fit(tfidf_train, y_train)

# Predict on the test set
pred = clf.predict(tfidf_test)

# Calculate accuracy or other metrics
score = metrics.accuracy_score(y_test, pred)
print("Accuracy: %0.3f" % score)


Accuracy: 0.933


In [4]:
from sklearn.metrics import classification_report

report=classification_report(y_test, pred)


In [5]:
print(report)

              precision    recall  f1-score   support

    Negative       0.93      1.00      0.96        27
    Positive       1.00      0.33      0.50         3

    accuracy                           0.93        30
   macro avg       0.97      0.67      0.73        30
weighted avg       0.94      0.93      0.92        30

