<a href="https://colab.research.google.com/github/gfeyzakorkmaz/gfeyzakorkmaz/blob/main/pythonday46.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

In [11]:
path = '/content/drive/MyDrive/Dataset/complex_marketing_data_enriched.xlsx'

In [12]:
df = pd.read_excel(path)
df.head()

Unnamed: 0,Customer ID,Age,Gender,Total Spent,Converted,Comments,Purchased Category,Stock,Region,Income Level,Discount Availed,Customer Tenure (Years),Campaign,Return Rate (%),Website Visit Frequency,Average Cart Value
0,1,23,F,150,1,Satisfied with the service,Clothing,4,East,Medium,46,4,Winter Sale,2.981829,3,378.0
1,2,35,M,600,1,Needs improvement in delivery,Beauty,86,West,Low,61,12,Summer Sale,1.833412,6,315.52
2,3,45,F,800,0,Regular customer,Books,24,North,Medium,50,12,Black Friday,6.547401,0,250.69
3,4,52,M,200,0,Occasional buyer,Beauty,10,East,Medium,54,7,Winter Sale,3.923717,1,222.58
4,5,29,F,300,1,High spender,Books,47,East,Medium,63,12,Winter Sale,2.306644,4,298.68


In [13]:
spending_summary = df.groupby('Purchased Category')['Total Spent'].sum().sort_values(ascending=False)
print(spending_summary)

Purchased Category
Books          2350
Home Goods     2090
Electronics    1680
Clothing       1600
Beauty         1200
Name: Total Spent, dtype: int64


In [14]:
campaign_conversion = df.groupby('Campaign')['Converted'].mean().sort_values(ascending=False)
print(campaign_conversion)

Campaign
Winter Sale     0.777778
Summer Sale     0.500000
Black Friday    0.000000
Name: Converted, dtype: float64


In [15]:
from gensim import corpora, models

comments = df['Comments'].dropna().tolist()
texts = [[word for word in comment.lower().split() if len(word) > 2] for comment in comments]

dictionary = corpora.Dictionary(texts)
corpus = [dictionary.doc2bow(text) for text in texts]

lda = models.LdaModel(corpus, num_topics=2, id2word=dictionary, passes=10)

topics = lda.print_topics(num_words=5)
print("Topics:", topics)

Topics: [(0, '0.091*"with" + 0.086*"the" + 0.071*"satisfied" + 0.051*"product" + 0.050*"enjoys"'), (1, '0.061*"customer" + 0.057*"high" + 0.056*"needs" + 0.034*"conversion" + 0.034*"rate"')]


In [16]:
region_discount = df.groupby('Region')['Discount Availed'].mean().sort_values(ascending=False)
print(region_discount)

Region
South    72.000000
West     45.166667
East     36.666667
North    29.750000
Name: Discount Availed, dtype: float64


In [17]:
high_spenders = df[df['Total Spent'] > df['Total Spent'].quantile(0.75)]
print(high_spenders[['Customer ID', 'Total Spent']])

    Customer ID  Total Spent
2             3          800
6             7          670
9            10          750
10           11          610
17           18          700


In [18]:
age_group_purchases = pd.cut(df['Age'], bins=[0, 25, 35, 50, 65], labels=['18-25', '26-35', '36-50', '51-65'])
df['Age Group'] = age_group_purchases
age_group_behavior = df.groupby('Age Group')['Converted'].mean()
print(age_group_behavior)

Age Group
18-25    0.666667
26-35    0.857143
36-50    0.444444
51-65    0.000000
Name: Converted, dtype: float64


  age_group_behavior = df.groupby('Age Group')['Converted'].mean()


In [19]:
loyalty_analysis = df.groupby('Customer Tenure (Years)')['Converted'].mean()
print(loyalty_analysis)

Customer Tenure (Years)
1     1.000000
2     0.500000
3     1.000000
4     0.750000
6     0.500000
7     0.000000
8     1.000000
10    0.000000
12    0.666667
13    0.000000
14    0.500000
Name: Converted, dtype: float64


In [21]:
category_return_rate = df.groupby('Purchased Category')['Return Rate (%)'].mean().sort_values(ascending=False)
print(category_return_rate)

Purchased Category
Home Goods     4.902000
Books          4.819366
Electronics    4.775070
Clothing       4.645520
Beauty         3.447599
Name: Return Rate (%), dtype: float64


In [22]:
website_engagement = df.groupby('Website Visit Frequency')['Converted'].mean()
print(website_engagement)

Website Visit Frequency
0    0.00
1    0.50
2    1.00
3    0.40
4    0.75
5    0.50
6    1.00
7    1.00
Name: Converted, dtype: float64


In [25]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

X = df[['Age', 'Total Spent', 'Discount Availed', 'Website Visit Frequency']]
y = df['Converted']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy =accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.6666666666666666


In [27]:
from gensim.models.phrases import Phrases, Phraser

phrases = Phrases(texts, min_count=2, threshold=1)
biagram_model = Phraser(phrases)
biagram_texts = [biagram_model[text] for text in texts]
print(biagram_texts[:5])

[['satisfied_with', 'the', 'service'], ['needs', 'improvement', 'delivery'], ['regular', 'customer'], ['occasional', 'buyer'], ['high', 'spender']]


In [30]:
from gensim.models import Word2Vec

word2vec_model = Word2Vec(sentences=texts, vector_size=50, window=3, min_count=2, workers=4)
print(word2vec_model.wv.most_similar('service'))

[('needs', 0.18460793793201447), ('customer', 0.13941581547260284), ('enjoys', 0.1071295216679573), ('satisfied', -0.010219999589025974), ('the', -0.05608532950282097), ('high', -0.08927592635154724), ('product', -0.1016002744436264), ('with', -0.11908359825611115)]


In [31]:
from gensim.models import LsiModel

lsi = LsiModel(corpus, num_topics=2, id2word=dictionary)
print(lsi.print_topics())

[(0, '0.588*"with" + 0.512*"satisfied" + 0.360*"service" + 0.339*"the" + 0.226*"customer" + 0.217*"product" + 0.152*"quality" + 0.077*"unhappy" + 0.077*"returns" + 0.073*"enjoys"'), (1, '-0.704*"the" + -0.296*"enjoys" + 0.282*"customer" + 0.227*"with" + -0.219*"brand" + -0.219*"loyal" + -0.219*"design" + -0.219*"likes" + -0.176*"product" + 0.157*"satisfied"')]


In [32]:
for doc in corpus:
  topics = lda.get_document_topics(doc)
  print(topics)

[(0, 0.89776736), (1, 0.10223258)]
[(0, 0.12917697), (1, 0.870823)]
[(0, 0.18050705), (1, 0.81949294)]
[(0, 0.8244682), (1, 0.17553179)]
[(0, 0.17178777), (1, 0.82821226)]
[(0, 0.82463753), (1, 0.17536247)]
[(0, 0.12986425), (1, 0.8701358)]
[(0, 0.18052125), (1, 0.81947875)]
[(0, 0.37322), (1, 0.62678)]
[(0, 0.12900832), (1, 0.8709917)]
[(0, 0.17335443), (1, 0.8266456)]
[(0, 0.8974423), (1, 0.10255774)]
[(0, 0.8243349), (1, 0.17566514)]
[(0, 0.8703279), (1, 0.12967208)]
[(0, 0.82676333), (1, 0.17323673)]
[(0, 0.8714958), (1, 0.12850422)]
[(0, 0.1293672), (1, 0.8706328)]
[(0, 0.86390996), (1, 0.13609004)]
[(0, 0.17334345), (1, 0.8266566)]
[(0, 0.8693699), (1, 0.1306301)]


In [36]:
!pip install pyLDAvis

Collecting pyLDAvis
  Downloading pyLDAvis-3.4.1-py3-none-any.whl.metadata (4.2 kB)
Collecting funcy (from pyLDAvis)
  Using cached funcy-2.0-py2.py3-none-any.whl.metadata (5.9 kB)
Downloading pyLDAvis-3.4.1-py3-none-any.whl (2.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.6/2.6 MB[0m [31m31.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading funcy-2.0-py2.py3-none-any.whl (30 kB)
Installing collected packages: funcy, pyLDAvis
Successfully installed funcy-2.0 pyLDAvis-3.4.1


In [37]:
import pyLDAvis.gensim_models as gensimvis
import pyLDAvis

lda_display = gensimvis.prepare(lda, corpus, dictionary)
pyLDAvis.display(lda_display)
