# SPOTIFY MUSIC SENTIMENT ANALYZER MODEL

In [1]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score
from textblob import TextBlob

In [2]:
data = pd.read_csv("reviews.csv")
data

Unnamed: 0,Time_submitted,Review,Rating,Total_thumbsup,Reply
0,2022-07-09 15:00:00,"Great music service, the audio is high quality...",5,2,
1,2022-07-09 14:21:22,Please ignore previous negative rating. This a...,5,1,
2,2022-07-09 13:27:32,"This pop-up ""Get the best Spotify experience o...",4,0,
3,2022-07-09 13:26:45,Really buggy and terrible to use as of recently,1,1,
4,2022-07-09 13:20:49,Dear Spotify why do I get songs that I didn't ...,1,1,
...,...,...,...,...,...
61589,2022-01-01 03:01:29,Even though it was communicated that lyrics fe...,1,6,
61590,2022-01-01 02:13:40,"Use to be sooo good back when I had it, and wh...",1,0,
61591,2022-01-01 01:02:29,This app would be good if not for it taking ov...,2,10,
61592,2022-01-01 00:49:23,The app is good hard to navigate and won't jus...,2,1,


In [3]:
data = data[['Review']]
data

Unnamed: 0,Review
0,"Great music service, the audio is high quality..."
1,Please ignore previous negative rating. This a...
2,"This pop-up ""Get the best Spotify experience o..."
3,Really buggy and terrible to use as of recently
4,Dear Spotify why do I get songs that I didn't ...
...,...
61589,Even though it was communicated that lyrics fe...
61590,"Use to be sooo good back when I had it, and wh..."
61591,This app would be good if not for it taking ov...
61592,The app is good hard to navigate and won't jus...


## Data Preprocessing

In [4]:
from nltk.corpus import stopwords
nltk.download('stopwords')
stopwords = stopwords.words('english')
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    text = text.lower()
    text = re.sub('[^a-zA-Z]', ' ', text)
    text = text.split()
    text = [word for word in text if word not in stopwords]
    text = [lemmatizer.lemmatize(word) for word in text]
    text = ' '.join(text)
    return text

data['Review'] = data['Review'].apply(preprocess_text)


def get_sentiment(Review):
    analysis = TextBlob(Review)
    if analysis.sentiment.polarity > 0:
        return 'positive'
    elif analysis.sentiment.polarity == 0:
        return 'neutral'
    else:
        return 'negative'

data['Sentiment'] = data['Review'].apply(get_sentiment)
data


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\C\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Review'] = data['Review'].apply(preprocess_text)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Sentiment'] = data['Review'].apply(get_sentiment)


Unnamed: 0,Review,Sentiment
0,great music service audio high quality app eas...,positive
1,please ignore previous negative rating app sup...,positive
2,pop get best spotify experience android annoyi...,positive
3,really buggy terrible use recently,negative
4,dear spotify get song put playlist shuffle play,neutral
...,...,...
61589,even though communicated lyric feature availab...,positive
61590,use sooo good back downloaded free version pic...,positive
61591,app would good taking device start comp spotif...,negative
61592,app good hard navigate let play song click pla...,positive


## Model Training & Evaluation

In [5]:
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

X_train, X_test, y_train, y_test = train_test_split(data['Review'], data['Sentiment'], test_size=0.2, random_state=42)

vectorizer = CountVectorizer(preprocessor=lambda x: x, tokenizer=lambda x: x)
X_train_counts = vectorizer.fit_transform(X_train)
X_test_counts = vectorizer.transform(X_test)

## MultinomialNB

In [6]:
model_nb = MultinomialNB()
model_nb.fit(X_train_counts, y_train)
y_pred_nb = model_nb.predict(X_test_counts)
acc_nb = accuracy_score(y_test, y_pred_nb)
prec_nb = precision_score(y_test, y_pred_nb, average='weighted')
recall_nb = recall_score(y_test, y_pred_nb, average='weighted')
print(f'MultinomialNB: Accuracy: {acc_nb:.4f} | Precision: {prec_nb:.4f} | Recall: {recall_nb:.4f}\n')

MultinomialNB: Accuracy: 0.6617 | Precision: 0.5700 | Recall: 0.6617



## Logistic Regression

In [7]:
model_lr = LogisticRegression()
model_lr.fit(X_train_counts, y_train)
y_pred_lr = model_lr.predict(X_test_counts)
acc_lr = accuracy_score(y_test, y_pred_lr)
prec_lr = precision_score(y_test, y_pred_lr, average='weighted')
recall_lr = recall_score(y_test, y_pred_lr, average='weighted')
print(f'LogisticRegression: Accuracy: {acc_lr:.4f} | Precision: {prec_lr:.4f} | Recall: {recall_lr:.4f}\n')


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression: Accuracy: 0.6674 | Precision: 0.6163 | Recall: 0.6674



## SVC

In [12]:
from sklearn.svm import SVC
model_svc = SVC()
model_svc.fit(X_train_counts, y_train)
y_pred_svc = model_svc.predict(X_test_counts)
acc_svc = accuracy_score(y_test, y_pred_svc)
prec_svc = precision_score(y_test, y_pred_svc, average='weighted')
recall_svc = recall_score(y_test, y_pred_svc, average='weighted')
print(f'SVC: Accuracy: {acc_svc:.4f} | Precision: {prec_svc:.4f} | Recall: {recall_svc:.4f}\n')


SVC: Accuracy: 0.6660 | Precision: 0.5684 | Recall: 0.6660



  _warn_prf(average, modifier, msg_start, len(result))
