In [98]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [99]:
df = pd.read_csv("../data/raw/marketing_campaign.csv",sep="\t")

In [100]:
df.columns

Index(['ID', 'Year_Birth', 'Education', 'Marital_Status', 'Income', 'Kidhome',
       'Teenhome', 'Dt_Customer', 'Recency', 'MntWines', 'MntFruits',
       'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts',
       'MntGoldProds', 'NumDealsPurchases', 'NumWebPurchases',
       'NumCatalogPurchases', 'NumStorePurchases', 'NumWebVisitsMonth',
       'AcceptedCmp3', 'AcceptedCmp4', 'AcceptedCmp5', 'AcceptedCmp1',
       'AcceptedCmp2', 'Complain', 'Z_CostContact', 'Z_Revenue', 'Response'],
      dtype='object')

In [101]:
df['Age'] = 2025 - df['Year_Birth']


In [102]:
df[['Year_Birth', 'Age']].head()

Unnamed: 0,Year_Birth,Age
0,1957,68
1,1954,71
2,1965,60
3,1984,41
4,1981,44


In [103]:
df['FamilySize'] = df['Kidhome'] + df['Teenhome'] + 2

In [104]:
df[['Kidhome', 'Teenhome', 'FamilySize']].head()


Unnamed: 0,Kidhome,Teenhome,FamilySize
0,0,0,2
1,1,1,4
2,0,0,2
3,1,0,3
4,1,0,3


In [105]:
spending_cols = [
    'MntWines', 'MntFruits',
       'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts',
       'MntGoldProds'
]

In [106]:
df['TotalSpending'] = df[spending_cols].sum(axis=1)

In [107]:
df[spending_cols + ['TotalSpending']].head()


Unnamed: 0,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,MntGoldProds,TotalSpending
0,635,88,546,172,88,88,1617
1,11,1,6,2,1,6,27
2,426,49,127,111,21,42,776
3,11,4,20,10,3,5,53
4,173,43,118,46,27,15,422


In [108]:
# Kampanya kabul kolonları
campaign_cols = ['AcceptedCmp1','AcceptedCmp2','AcceptedCmp3',
                 'AcceptedCmp4','AcceptedCmp5','Response']

# Toplam kabul edilen kampanya sayısı
df['TotalAcceptedCmp'] = df[campaign_cols].sum(axis=1)

# Başarı oranı (0 ile 1 arasında)
df['CampaignSuccessRate'] = df['TotalAcceptedCmp'] / len(campaign_cols)

# Kontrol
df[campaign_cols + ['TotalAcceptedCmp', 'CampaignSuccessRate']].head()


Unnamed: 0,AcceptedCmp1,AcceptedCmp2,AcceptedCmp3,AcceptedCmp4,AcceptedCmp5,Response,TotalAcceptedCmp,CampaignSuccessRate
0,0,0,0,0,0,1,1,0.166667
1,0,0,0,0,0,0,0,0.0
2,0,0,0,0,0,0,0,0.0
3,0,0,0,0,0,0,0,0.0
4,0,0,0,0,0,0,0,0.0


In [109]:
# Evde çocuk var mı? (Kidhome + Teenhome > 0 ise 1, değilse 0)
df['IsParent'] = (df['Kidhome'] + df['Teenhome'] > 0).astype(int)

# Kontrol
df[['Kidhome', 'Teenhome', 'IsParent']].head()


Unnamed: 0,Kidhome,Teenhome,IsParent
0,0,0,0
1,1,1,1
2,0,0,0
3,1,0,1
4,1,0,1


In [110]:
# Tarihleri day-month-year formatında parse et
df['Dt_Customer'] = pd.to_datetime(df['Dt_Customer'], dayfirst=True)

# Müşteri kaç gündür şirkette? (tenure)
df['CustomerTenure'] = (df['Dt_Customer'].max() - df['Dt_Customer']).dt.days

# Kontrol
df[['Dt_Customer', 'CustomerTenure']].head()


Unnamed: 0,Dt_Customer,CustomerTenure
0,2012-09-04,663
1,2014-03-08,113
2,2013-08-21,312
3,2014-02-10,139
4,2014-01-19,161


In [111]:
df.columns


Index(['ID', 'Year_Birth', 'Education', 'Marital_Status', 'Income', 'Kidhome',
       'Teenhome', 'Dt_Customer', 'Recency', 'MntWines', 'MntFruits',
       'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts',
       'MntGoldProds', 'NumDealsPurchases', 'NumWebPurchases',
       'NumCatalogPurchases', 'NumStorePurchases', 'NumWebVisitsMonth',
       'AcceptedCmp3', 'AcceptedCmp4', 'AcceptedCmp5', 'AcceptedCmp1',
       'AcceptedCmp2', 'Complain', 'Z_CostContact', 'Z_Revenue', 'Response',
       'Age', 'FamilySize', 'TotalSpending', 'TotalAcceptedCmp',
       'CampaignSuccessRate', 'IsParent', 'CustomerTenure'],
      dtype='object')

In [112]:
cols_to_drop = ['ID', 'Year_Birth', 'Dt_Customer', 'Z_CostContact', 'Z_Revenue']

df = df.drop(columns=cols_to_drop)

df.columns


Index(['Education', 'Marital_Status', 'Income', 'Kidhome', 'Teenhome',
       'Recency', 'MntWines', 'MntFruits', 'MntMeatProducts',
       'MntFishProducts', 'MntSweetProducts', 'MntGoldProds',
       'NumDealsPurchases', 'NumWebPurchases', 'NumCatalogPurchases',
       'NumStorePurchases', 'NumWebVisitsMonth', 'AcceptedCmp3',
       'AcceptedCmp4', 'AcceptedCmp5', 'AcceptedCmp1', 'AcceptedCmp2',
       'Complain', 'Response', 'Age', 'FamilySize', 'TotalSpending',
       'TotalAcceptedCmp', 'CampaignSuccessRate', 'IsParent',
       'CustomerTenure'],
      dtype='object')

In [113]:
# Her kolon için eksik değer sayısı
missing_counts = df.isnull().sum()

# Sadece eksik bulunan kolonları göster
missing_counts[missing_counts > 0]


Income    24
dtype: int64

In [114]:
# Sayısal kolonlar
numeric_cols = df.select_dtypes(include=['number']).columns

# Eksik içeren sayısal kolonları bul
num_na_cols = [col for col in numeric_cols if df[col].isnull().sum() > 0]
num_na_cols

for col in num_na_cols:
    median_value = df[col].median()
    df[col] = df[col].fillna(median_value)


In [115]:
df[num_na_cols].isnull().sum()


Income    0
dtype: int64

In [116]:
# Kategorik kolonlar
cat_cols = df.select_dtypes(exclude=['number']).columns

# Eksik içeren kategorik kolonlar
cat_na_cols = [col for col in cat_cols if df[col].isnull().sum() > 0]
cat_na_cols


[]

In [117]:
df.isnull().sum().sum()


np.int64(0)

In [118]:
# Churn label oluşturma
df['Churn'] = (df['Recency'] > 40).astype(int)

# İlk 10 satıra bakalım
df[['Recency', 'Churn']].head(10)


Unnamed: 0,Recency,Churn
0,58,1
1,38,0
2,26,0
3,26,0
4,94,1
5,16,0
6,34,0
7,32,0
8,19,0
9,68,1


In [119]:
df = df.drop("Recency",axis=1)

In [120]:
df['CLV'] = df['TotalSpending'] / (df['CustomerTenure'] + 1)

In [121]:
df['SpendingToIncome'] = df['TotalSpending'] / (df['Income'] + 1)

In [122]:
threshold = df['TotalSpending'].quantile(0.75)
df['HighValue'] = (df['TotalSpending'] >= threshold).astype(int)


In [123]:
df['PurchaseActivity'] = (
    df['NumWebVisitsMonth'] 
    + df['NumStorePurchases'] 
    + df['NumWebPurchases'] 
    + df['NumCatalogPurchases']
)


In [124]:
df.to_csv("../data/processed/marketing_campaign_processed.csv",index=False)