<a href="https://colab.research.google.com/github/gfeyzakorkmaz/gfeyzakorkmaz/blob/main/pythonday16.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import pandas as pd

path = '/content/drive/MyDrive/Dataset/extended_marketing_data.xlsx'

df = pd.read_excel(path)
df.head()

Unnamed: 0,Customer ID,Age,Gender,Total Spent,Converted,Comments
0,1,23,F,150,1,Satisfied with the service
1,2,35,M,600,1,Needs improvement in delivery
2,3,45,F,800,0,Regular customer
3,4,52,M,200,0,Occasional buyer
4,5,29,F,300,1,High spender


In [12]:
!pip install vaderSentiment



In [16]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

comments = df['Comments'].tolist()

for comment in comments:
  sentiment = analyzer.polarity_scores(comment)
  print(f"Original: {comment}")
  print(f"Sentiment Score: {sentiment}\n")

Original: Satisfied with the service
Sentiment Score: {'neg': 0.0, 'neu': 0.517, 'pos': 0.483, 'compound': 0.4215}

Original: Needs improvement in delivery
Sentiment Score: {'neg': 0.0, 'neu': 0.5, 'pos': 0.5, 'compound': 0.4588}

Original: Regular customer
Sentiment Score: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

Original: Occasional buyer
Sentiment Score: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

Original: High spender
Sentiment Score: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

Original: Average experience
Sentiment Score: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

Original: Prefers online shopping
Sentiment Score: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

Original: New customer
Sentiment Score: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

Original: Loyal to the brand
Sentiment Score: {'neg': 0.0, 'neu': 0.492, 'pos': 0.508, 'compound': 0.4767}

Original: High conversion rate
Sentiment Score: {'neg': 0.0, 'neu'

In [17]:
pip install gensim



In [19]:
import gensim
from gensim.utils import simple_preprocess
from gensim import corpora

tokenized_comments = [simple_preprocess(comment) for comment in comments]

dictionary = corpora.Dictionary(tokenized_comments)
corpus = [dictionary.doc2bow(text) for text in tokenized_comments]

lda_model = gensim.models.LdaModel(corpus, num_topics=2, id2word=dictionary, passes=15)

topics = lda_model.print_topics(num_words=4)
for topic in topics:
  print(f"Topic: {topic}\n")

Topic: (0, '0.074*"high" + 0.046*"the" + 0.044*"loyal" + 0.044*"brand"')

Topic: (1, '0.061*"with" + 0.059*"satisfied" + 0.059*"customer" + 0.035*"improvement"')



In [20]:
pip install scikit-learn



In [21]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(comments)

keywords = vectorizer.get_feature_names_out()
print(f"Keywords: {keywords}\n")


Keywords: ['average' 'brand' 'buyer' 'conversion' 'customer' 'delivery' 'discounts'
 'engagement' 'enjoys' 'experience' 'frequent' 'high' 'improvement' 'low'
 'loyal' 'needs' 'new' 'occasional' 'online' 'prefers' 'product' 'quality'
 'rate' 'regular' 'returns' 'satisfied' 'service' 'shopper' 'shopping'
 'spender' 'unhappy']



In [22]:
print(f"TF-IDF Scores for first comment: {X[0].toarray()}")

TF-IDF Scores for first comment: [[0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.65564815 0.75506655 0.         0.         0.
  0.        ]]


In [23]:
feature_names = vectorizer.get_feature_names_out()

first_comment_tfidf = X[0].toarray().flatten()
for index, value in enumerate(first_comment_tfidf):
    if value > 0:
        print(f"Word: {feature_names[index]}, TF-IDF Score: {value}")

Word: satisfied, TF-IDF Score: 0.6556481547479347
Word: service, TF-IDF Score: 0.7550665514877668


In [25]:
import spacy
nlp = spacy.load('en_core_web_sm')

comment5 = nlp(comments[5])
comment6 = nlp(comments[6])

similarity = comment5.similarity(comment6)
print(f"Similarity between '{comments[5]}' and '{comments[6]}': {similarity}")

Similarity between 'Average experience' and 'Prefers online shopping': 0.3792169057942345


  similarity = comment5.similarity(comment6)


In [31]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score

vectorizer = CountVectorizer()
x = vectorizer.fit_transform(comments)

labels = [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]

x_train, x_test, y_train, y_test = train_test_split(x, labels, test_size=0.2)

classifier = MultinomialNB()
classifier.fit(x_train, y_train)

y_pred = classifier.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Classification Accuracy: {accuracy}")

Classification Accuracy: 0.6666666666666666
