<a href="https://colab.research.google.com/github/gfeyzakorkmaz/gfeyzakorkmaz/blob/main/pythonday53.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report
from textblob import TextBlob
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
path = '/content/drive/MyDrive/Dataset/complex_marketing_data_enriched.xlsx'

In [5]:
df = pd.read_excel(path)

In [25]:
def customer_segmentation(df):
  segmentation_features = df[['Age', 'Total Spent', 'Customer Tenure (Years)']]
  scaler = StandardScaler()
  scaled_features = scaler.fit_transform(segmentation_features)
  kmeans = KMeans(n_clusters=4, random_state=42)
  df['Segment'] = kmeans.fit_predict(scaled_features)
  segments_summary = df.groupby('Segment').agg({
      'Age': 'mean',
      'Total Spent': 'mean',
      'Customer Tenure (Years)': 'mean'
  })
  return df, segments_summary

segmented_df, segments_summary = customer_segmentation(df)
segments_summary

Unnamed: 0_level_0,Age,Total Spent,Customer Tenure (Years)
Segment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,26.2,256.0,3.4
1,37.333333,631.666667,4.5
2,35.6,500.0,12.6
3,50.0,337.5,9.75


In [26]:
def predictive_analysis(df):
  le = LabelEncoder()
  df['Gender'] = le.fit_transform(df['Gender'])
  X = df[['Age', 'Total Spent', 'Discount Availed', 'Website Visit Frequency']]
  y = df['Converted']
  model = RandomForestClassifier(random_state=42)
  model.fit(X, y)
  predictions = model.predict(X)
  report = classification_report(y, predictions)
  return predictions, report

conversion_predictions, conversion_report = predictive_analysis(df)
conversion_report

print("Predictive Analysis Report:")
print(conversion_report)

Predictive Analysis Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       1.00      1.00      1.00        12

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20



In [15]:
def sentiment_analysis(df):
  df['Sentiment'] = df['Comments'].apply(lambda x: TextBlob(str(x)).sentiment.polarity)
  sentiment_summary = df[['Comments', 'Sentiment']].head()
  return df, sentiment_summary

sentiment_df, sentiment_summary = sentiment_analysis(df)
sentiment_summary

Unnamed: 0,Comments,Sentiment
0,Satisfied with the service,0.5
1,Needs improvement in delivery,0.0
2,Regular customer,0.0
3,Occasional buyer,0.0
4,High spender,0.16


In [17]:
def detect_outliers(df):
  Q1 = df['Total Spent'].quantile(0.25)
  Q3 = df['Total Spent'].quantile(0.75)
  IQR = Q3 - Q1
  outliers = df[(df['Total Spent'] < (Q1 - 0.2 * IQR)) | (df['Total Spent'] > (Q3 + 0.1 * IQR))]
  return outliers

spending_outliers = detect_outliers(df)
spending_outliers

Unnamed: 0,Customer ID,Age,Gender,Total Spent,Converted,Comments,Purchased Category,Stock,Region,Income Level,Discount Availed,Customer Tenure (Years),Campaign,Return Rate (%),Website Visit Frequency,Average Cart Value,Segment,Sentiment
0,1,23,0,150,1,Satisfied with the service,Clothing,4,East,Medium,46,4,Winter Sale,2.981829,3,378.0,0,0.5
2,3,45,0,800,0,Regular customer,Books,24,North,Medium,50,12,Black Friday,6.547401,0,250.69,2,0.0
3,4,52,1,200,0,Occasional buyer,Beauty,10,East,Medium,54,7,Winter Sale,3.923717,1,222.58,3,0.0
6,7,38,0,670,1,Prefers online shopping,Home Goods,69,North,Low,50,8,Winter Sale,2.738895,1,290.23,1,0.0
7,8,27,1,120,1,New customer,Home Goods,0,North,Low,6,3,Winter Sale,5.268858,7,181.4,0,0.136364
9,10,33,1,750,1,High conversion rate,Clothing,45,South,Medium,72,1,,6.775497,4,237.93,1,0.16
17,18,35,1,700,1,Satisfied with customer service,Clothing,87,West,High,89,6,,4.179233,5,440.44,1,0.5


In [19]:
def region_analysis(df):
  region_spending = df.groupby('Region')['Total Spent'].mean()
  return region_spending

region_spending = region_analysis(df)
region_spending

Unnamed: 0_level_0,Total Spent
Region,Unnamed: 1_level_1
East,352.222222
North,485.0
South,750.0
West,510.0


In [20]:
def discount_effectiveness(df):
  discount_conversion_summary = df.groupby('Converted')['Discount Availed'].mean()
  return discount_conversion_summary

discount_summary = discount_effectiveness(df)
discount_summary



Unnamed: 0_level_0,Discount Availed
Converted,Unnamed: 1_level_1
0,36.375
1,41.75


In [27]:
def correlation_analysis(df):
  correlations = df.corr(numeric_only=True)
  return correlations

correlation_matrix = correlation_analysis(df)
correlation_matrix

print("Correlation Matrix:")
print(correlation_matrix)

Correlation Matrix:
                          Customer ID       Age    Gender  Total Spent  \
Customer ID              1.000000e+00 -0.024717 -0.078433     0.024769   
Age                     -2.471694e-02  1.000000 -0.256714     0.244955   
Gender                  -7.843305e-02 -0.256714  1.000000    -0.119961   
Total Spent              2.476877e-02  0.244955 -0.119961     1.000000   
Converted               -1.100441e-16 -0.488302 -0.082061     0.108285   
Stock                    1.951259e-01  0.049862  0.022307     0.523338   
Discount Availed        -1.310777e-01  0.004612  0.163009     0.283919   
Customer Tenure (Years) -2.561158e-01  0.409548 -0.087869     0.097870   
Return Rate (%)          2.863157e-01  0.211488  0.071679     0.073645   
Website Visit Frequency  1.019526e-01 -0.160536  0.286984    -0.234471   
Average Cart Value       9.889699e-02 -0.139471 -0.172178     0.256117   
Segment                 -1.380609e-01  0.805108 -0.150609     0.084082   
Sentiment         

In [28]:
def churn_prediction(df):
  X = df[['Customer Tenure (Years)', 'Return Rate (%)']]
  y = (df['Converted'] == 0).astype(int)
  model = RandomForestClassifier(random_state=42)
  model.fit(X, y)
  churn_predictions = model.predict(X)
  report = classification_report(y, churn_predictions)
  return churn_predictions, report

churn_predictions, churn_report = churn_prediction(df)
churn_report

print("Churn Prediction Report:")
print(churn_report)

Churn Prediction Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       1.00      1.00      1.00         8

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20



In [33]:
def recommendation_engine(df):
  recommendations = df.groupby('Purchased Category')['Total Spent'].mean().sort_values(ascending=False)
  return recommendations

product_recommendations = recommendation_engine(df)
product_recommendations

display(pd.DataFrame(product_recommendations))

Unnamed: 0_level_0,Total Spent
Purchased Category,Unnamed: 1_level_1
Clothing,533.333333
Books,470.0
Electronics,420.0
Home Goods,418.0
Beauty,400.0
