<a href="https://colab.research.google.com/github/gfeyzakorkmaz/Tries/blob/main/Pandas1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

In [2]:
path = '/content/drive/MyDrive/Dataset/synthetic_tableau_data.xlsx'

In [3]:
df = pd.read_excel(path)
df.head()

Unnamed: 0,Customer ID,Age,Gender,Total Spent,Converted,Comments,Purchased Category,Stock,Region,Income Level,Discount Availed,Customer Tenure (Years),Campaign,Return Rate (%),Website Visit Frequency,Average Cart Value
0,1,56,F,397,0,Satisfied,Electronics,176,West,Medium,64,5,Winter Sale,5.349041,0,58.81
1,2,69,M,676,0,Needs improvement,Clothing,84,North,Medium,13,9,Black Friday,3.485091,1,439.97
2,3,46,M,153,0,Frequent buyer,Books,91,East,High,19,10,Black Friday,1.320668,7,432.2
3,4,32,M,679,0,Frequent buyer,Beauty,134,South,Medium,56,13,Black Friday,2.658952,5,213.71
4,5,60,M,566,0,Satisfied,Books,91,South,Medium,23,15,Summer Sale,4.384398,8,357.15


In [4]:
weighted_conversion = df.groupby(['Region', 'Campaign']).apply(
    lambda x: (x['Converted'] * x['Website Visit Frequency']).sum() / x['Website Visit Frequency'].sum()
).reset_index(name='Weighted Conversion Rate')
weighted_conversion

  weighted_conversion = df.groupby(['Region', 'Campaign']).apply(


Unnamed: 0,Region,Campaign,Weighted Conversion Rate
0,Central,Black Friday,0.303571
1,Central,Holiday Offers,0.346405
2,Central,Summer Sale,0.32093
3,Central,Winter Sale,0.285068
4,East,Black Friday,0.180556
5,East,Holiday Offers,0.371681
6,East,Summer Sale,0.457692
7,East,Winter Sale,0.375
8,North,Black Friday,0.486339
9,North,Holiday Offers,0.209424


In [5]:
df['Segment'] = pd.cut(
    df['Total Spent'],
    bins=[0, 250, 500, 1000],
    labels=['Low Spender', 'Mid Spender', 'High Spender']
)

df['Loyalty Level'] = pd.cut(
    df['Customer Tenure (Years)'],
    bins=[0, 5, 10, 20],
    labels=['New Customer', 'Loyal Customer', 'Very Loyal Customer']
)

segmentation = df.groupby(['Segment', 'Loyalty Level']).size().unstack(fill_value=0)
segmentation

  segmentation = df.groupby(['Segment', 'Loyalty Level']).size().unstack(fill_value=0)


Loyalty Level,New Customer,Loyal Customer,Very Loyal Customer
Segment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Low Spender,50,52,97
Mid Spender,63,66,122
High Spender,139,149,262


In [9]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

X = df[['Discount Availed']].values
y = df['Total Spent'].values

poly = PolynomialFeatures(degree=2)
X_poly = poly.fit_transform(X)

model = LinearRegression().fit(X_poly, y)
df['Predicted Spend'] = model.predict(poly.transform(df[['Discount Availed']]))
df[['Customer ID', 'Discount Availed', 'Total Spent', 'Predicted Spend']]



Unnamed: 0,Customer ID,Discount Availed,Total Spent,Predicted Spend
0,1,64,397,530.504365
1,2,13,676,549.396607
2,3,19,153,546.531479
3,4,56,679,532.649247
4,5,23,566,544.716580
...,...,...,...,...
995,996,79,250,527.303698
996,997,64,709,530.504365
997,998,66,173,530.015738
998,999,25,50,543.837687


In [10]:
basket_analysis = pd.crosstab(
    index=df['Purchased Category'],
    columns=df['Campaign'],
    values=df['Total Spent'],
    aggfunc='sum'
).fillna(0)
basket_analysis

Campaign,Black Friday,Holiday Offers,Summer Sale,Winter Sale
Purchased Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Beauty,24336,26021,28134,27710
Books,30082,34144,25486,29708
Clothing,20923,22002,25147,18781
Electronics,24575,33473,28720,30676
Home,27445,25112,27415,27082


In [11]:
spend_mean = df['Total Spent'].mean()
spend_std = df['Total Spent'].std()

df['Anomaly'] = (df['Total Spent'] > (spend_mean + 3 * spend_std)) | \
                  (df['Total Spent'] < (spend_mean - 3 * spend_std))
df[['Customer ID', 'Total Spent', 'Anomaly']]


Unnamed: 0,Customer ID,Total Spent,Anomaly
0,1,397,False
1,2,676,False
2,3,153,False
3,4,679,False
4,5,566,False
...,...,...,...
995,996,250,False
996,997,709,False
997,998,173,False
998,999,50,False


In [12]:
age_spend_region = df.pivot_table(
    values='Total Spent',
    index=pd.cut(df['Age'], bins=[18, 25, 45, 60, 75, 100]),
    columns='Region',
    aggfunc='mean'
).fillna(0)
age_spend_region

  age_spend_region = df.pivot_table(


Region,Central,East,North,South,West
Age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"(18, 25]",478.142857,456.1,558.307692,557.25,503.56
"(25, 45]",581.228916,520.180556,590.59375,494.444444,509.611111
"(45, 60]",539.711538,545.142857,518.444444,577.179104,516.478261
"(60, 75]",581.540541,562.605263,517.146341,598.157895,487.172414


In [13]:
retention_rate = df.groupby('Customer Tenure (Years)').apply(
    lambda x: x[x['Converted'] == 1].shape[0] / x.shape[0]
).reset_index(name='Retention Rate')
retention_rate

  retention_rate = df.groupby('Customer Tenure (Years)').apply(


Unnamed: 0,Customer Tenure (Years),Retention Rate
0,1,0.255319
1,2,0.186047
2,3,0.232143
3,4,0.320755
4,5,0.264151
5,6,0.428571
6,7,0.315789
7,8,0.386364
8,9,0.315789
9,10,0.316667


In [14]:
season_spend = df.groupby(['Campaign', pd.cut(df['Age'], bins=[18, 30, 45, 60, 75, 100])])['Total Spent'].mean().unstack()
season_spend

  season_spend = df.groupby(['Campaign', pd.cut(df['Age'], bins=[18, 30, 45, 60, 75, 100])])['Total Spent'].mean().unstack()


Age,"(18, 30]","(30, 45]","(45, 60]","(60, 75]","(75, 100]"
Campaign,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Black Friday,534.244898,566.176471,548.885246,599.159091,
Holiday Offers,484.933333,555.47619,534.0,494.816327,
Summer Sale,512.678571,476.666667,551.366197,613.585366,
Winter Sale,534.040816,579.671875,529.974359,514.102041,


In [15]:
df['CLV'] = df['Total Spent'] * df['Customer Tenure (Years)'] * (1 - df['Return Rate (%)'] / 100)
df[['Customer ID', 'Total Spent', 'Customer Tenure (Years)', 'Return Rate (%)', 'CLV']]

Unnamed: 0,Customer ID,Total Spent,Customer Tenure (Years),Return Rate (%),CLV
0,1,397,5,5.349041,1878.821539
1,2,676,9,3.485091,5871.967033
2,3,153,10,1.320668,1509.793778
3,4,679,13,2.658952,8592.294270
4,5,566,15,4.384398,8117.764572
...,...,...,...,...,...
995,996,250,8,8.280595,1834.388091
996,997,709,14,4.115638,9517.481800
997,998,173,10,5.188083,1640.246162
998,999,50,3,0.336503,149.495245


In [17]:
import numpy as np

df['Churn Probability'] = np.where(
    (df['Return Rate (%)'] > df['Return Rate (%)'].quantile(0.75)) & (df['Converted'] == 0),
    'High',
    'Low'
)
df[['Customer ID', 'Return Rate (%)', 'Converted', 'Churn Probability']]


Unnamed: 0,Customer ID,Return Rate (%),Converted,Churn Probability
0,1,5.349041,0,Low
1,2,3.485091,0,Low
2,3,1.320668,0,Low
3,4,2.658952,0,Low
4,5,4.384398,0,Low
...,...,...,...,...
995,996,8.280595,0,High
996,997,4.115638,0,Low
997,998,5.188083,0,Low
998,999,0.336503,0,Low
