# E-Ticaret MÃ¼ÅŸteri DavranÄ±ÅŸ Analizi ve SatÄ±ÅŸ Tahmini

## Proje HakkÄ±nda
Bu notebook, e-ticaret verilerini kullanarak mÃ¼ÅŸteri davranÄ±ÅŸ analizi, segmentasyon, satÄ±ÅŸ tahmini ve mÃ¼ÅŸteri sÄ±nÄ±flandÄ±rmasÄ± gerÃ§ekleÅŸtirmektedir.

### Ä°Ã§erik:
1. **Veri YÃ¼kleme ve HazÄ±rlÄ±k**
2. **KeÅŸifsel Veri Analizi (EDA)**
3. **Veri Ã–n Ä°ÅŸleme**
4. **MÃ¼ÅŸteri Segmentasyonu (KÃ¼meleme)**
5. **SatÄ±ÅŸ Tahmini (Regresyon)**
6. **MÃ¼ÅŸteri DavranÄ±ÅŸ SÄ±nÄ±flandÄ±rmasÄ±**
7. **SonuÃ§lar ve Ã–neriler**

---

## 1. KÃ¼tÃ¼phaneler ve Veri YÃ¼kleme

Bu bÃ¶lÃ¼mde projemiz iÃ§in gerekli Python kÃ¼tÃ¼phanelerini yÃ¼klÃ¼yor ve veri setlerini okuyoruz.

In [None]:
# Gerekli kÃ¼tÃ¼phanelerin yÃ¼klenmesi
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, silhouette_samples
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, RandomForestClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, classification_report, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

# Matplotlib best practice setup
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 10
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print(" TÃ¼m kÃ¼tÃ¼phaneler baÅŸarÄ±yla yÃ¼klendi!")

In [None]:
# Veri setlerinin yÃ¼klenmesi
try:
    # E-ticaret veri setleri
    customers_df = pd.read_csv('data/customers.csv')
    products_df = pd.read_csv('data/products.csv')
    transactions_df = pd.read_csv('data/transactions.csv')
    interactions_df = pd.read_csv('data/interactions.csv')
    
    print(" TÃ¼m veri setleri baÅŸarÄ±yla yÃ¼klendi!")
    
    # Veri seti bilgileri
    datasets = {
        'MÃ¼ÅŸteriler': customers_df,
        'ÃœrÃ¼nler': products_df,
        'Ä°ÅŸlemler': transactions_df,
        'EtkileÅŸimler': interactions_df
    }
    
    for name, df in datasets.items():
        print(f"\n {name} Dataset:")
        print(f"  - Boyut: {df.shape}")
        print(f"  - SÃ¼tunlar: {list(df.columns)}")
        
except FileNotFoundError as e:
    print(f" Veri dosyasÄ± bulunamadÄ±: {e}")
    print("Ã–rnek veri oluÅŸturuluyor...")
    
    # Ã–rnek veri oluÅŸturma fonksiyonu
    def create_sample_data():
        np.random.seed(42)
        
        # MÃ¼ÅŸteri verisi
        customers_data = {
            'customer_id': range(1, 1001),
            'age': np.random.randint(18, 80, 1000),
            'gender': np.random.choice(['M', 'F'], 1000),
            'city': np.random.choice(['Ä°stanbul', 'Ankara', 'Ä°zmir', 'Bursa'], 1000),
            'registration_date': pd.date_range('2020-01-01', periods=1000, freq='D'),
            'total_spent': np.random.uniform(100, 5000, 1000),
            'purchase_frequency': np.random.randint(1, 50, 1000)
        }
        
        # ÃœrÃ¼n verisi
        products_data = {
            'product_id': range(1, 501),
            'product_name': [f'ÃœrÃ¼n_{i}' for i in range(1, 501)],
            'category': np.random.choice(['Elektronik', 'Giyim', 'Ev & YaÅŸam', 'Kitap', 'Spor'], 500),
            'price': np.random.uniform(10, 1000, 500),
            'brand': np.random.choice(['Marka_A', 'Marka_B', 'Marka_C', 'Marka_D'], 500)
        }
        
        # Ä°ÅŸlem verisi
        transactions_data = {
            'transaction_id': range(1, 5001),
            'customer_id': np.random.randint(1, 1001, 5000),
            'product_id': np.random.randint(1, 501, 5000),
            'quantity': np.random.randint(1, 10, 5000),
            'price': np.random.uniform(10, 1000, 5000),
            'discount': np.random.uniform(0, 0.3, 5000),
            'transaction_date': pd.date_range('2023-01-01', periods=5000, freq='6H'),
            'payment_method': np.random.choice(['Kredi KartÄ±', 'Debit Kart', 'Havale/EFT', 'KapÄ±da Ã–deme'], 5000)
        }
        
        # EtkileÅŸim verisi
        interactions_data = {
            'interaction_id': range(1, 10001),
            'customer_id': np.random.randint(1, 1001, 10000),
            'product_id': np.random.randint(1, 501, 10000),
            'interaction_type': np.random.choice(['view', 'click', 'add_to_cart', 'wishlist'], 10000),
            'interaction_date': pd.date_range('2023-01-01', periods=10000, freq='3H')
        }
        
        return (
            pd.DataFrame(customers_data),
            pd.DataFrame(products_data),
            pd.DataFrame(transactions_data),
            pd.DataFrame(interactions_data)
        )
    
    # Ã–rnek verileri oluÅŸtur
    customers_df, products_df, transactions_df, interactions_df = create_sample_data()
    print(" Ã–rnek veriler oluÅŸturuldu!")
    
    # DataFrame'leri kaydet
    customers_df.to_csv('data/customers.csv', index=False)
    products_df.to_csv('data/products.csv', index=False)
    transactions_df.to_csv('data/transactions.csv', index=False)
    interactions_df.to_csv('data/interactions.csv', index=False)

## 2. KeÅŸifsel Veri Analizi (EDA)

Bu bÃ¶lÃ¼mde veri setlerimizi detaylÄ± olarak inceleyerek temel istatistikleri, daÄŸÄ±lÄ±mlarÄ± ve iliÅŸkileri analiz edeceÄŸiz.

In [None]:
# Veri setlerinin genel bilgileri
print("=== VERÄ° SETÄ° GENEL BÄ°LGÄ°LERÄ° ===\n")

datasets_info = {
    'MÃ¼ÅŸteriler': customers_df,
    'ÃœrÃ¼nler': products_df,
    'Ä°ÅŸlemler': transactions_df,
    'EtkileÅŸimler': interactions_df
}

for name, df in datasets_info.items():
    print(f" {name} Dataset:")
    print(f"  Boyut: {df.shape}")
    print(f"  Bellek KullanÄ±mÄ±: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
    print(f"  Eksik DeÄŸerler: {df.isnull().sum().sum()}")
    print(f"  SÃ¼tunlar: {list(df.columns)}")
    print("-" * 50)

In [None]:
# Temel istatistikler - MÃ¼ÅŸteri verisi
print("=== MÃœÅžTERÄ° VERÄ°SÄ° TEMEL Ä°STATÄ°STÄ°KLERÄ° ===")
print("\n SayÄ±sal DeÄŸiÅŸkenler:")
print(customers_df.describe())

print("\n Kategorik DeÄŸiÅŸkenler:")
print("\nCinsiyet DaÄŸÄ±lÄ±mÄ±:")
print(customers_df['gender'].value_counts())

print("\nÅžehir DaÄŸÄ±lÄ±mÄ±:")
print(customers_df['city'].value_counts())

In [None]:
# MÃ¼ÅŸteri demografik analizi
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('MÃ¼ÅŸteri Demografik Analizi', fontsize=16, fontweight='bold')

# YaÅŸ daÄŸÄ±lÄ±mÄ±
axes[0,0].hist(customers_df['age'], bins=20, alpha=0.7, color='skyblue', edgecolor='black')
axes[0,0].set_title('YaÅŸ DaÄŸÄ±lÄ±mÄ±')
axes[0,0].set_xlabel('YaÅŸ')
axes[0,0].set_ylabel('Frekans')

# Cinsiyet daÄŸÄ±lÄ±mÄ±
gender_counts = customers_df['gender'].value_counts()
axes[0,1].pie(gender_counts.values, labels=gender_counts.index, autopct='%1.1f%%', startangle=90)
axes[0,1].set_title('Cinsiyet DaÄŸÄ±lÄ±mÄ±')

# Åžehir daÄŸÄ±lÄ±mÄ±
city_counts = customers_df['city'].value_counts()
axes[1,0].bar(city_counts.index, city_counts.values, color='lightcoral')
axes[1,0].set_title('Åžehir DaÄŸÄ±lÄ±mÄ±')
axes[1,0].set_xlabel('Åžehir')
axes[1,0].set_ylabel('MÃ¼ÅŸteri SayÄ±sÄ±')
axes[1,0].tick_params(axis='x', rotation=45)

# Toplam harcama daÄŸÄ±lÄ±mÄ±
axes[1,1].hist(customers_df['total_spent'], bins=20, alpha=0.7, color='lightgreen', edgecolor='black')
axes[1,1].set_title('Toplam Harcama DaÄŸÄ±lÄ±mÄ±')
axes[1,1].set_xlabel('Toplam Harcama (â‚º)')
axes[1,1].set_ylabel('Frekans')

plt.tight_layout()
plt.show()

# Korelasyon matrisi
plt.figure(figsize=(10, 8))
numeric_cols = customers_df.select_dtypes(include=[np.number]).columns
correlation_matrix = customers_df[numeric_cols].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0, square=True)
plt.title('MÃ¼ÅŸteri Verisi Korelasyon Matrisi')
plt.tight_layout()
plt.show()

In [None]:
# ÃœrÃ¼n analizi
print("=== ÃœRÃœN VERÄ°SÄ° ANALÄ°ZÄ° ===")
print("\n Kategori DaÄŸÄ±lÄ±mÄ±:")
print(products_df['category'].value_counts())

print("\n Fiyat Ä°statistikleri:")
print(products_df['price'].describe())

# ÃœrÃ¼n kategorileri ve fiyat analizi
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# Kategori daÄŸÄ±lÄ±mÄ±
category_counts = products_df['category'].value_counts()
axes[0].pie(category_counts.values, labels=category_counts.index, autopct='%1.1f%%')
axes[0].set_title('ÃœrÃ¼n Kategorisi DaÄŸÄ±lÄ±mÄ±')

# Kategoriye gÃ¶re fiyat daÄŸÄ±lÄ±mÄ±
products_df.boxplot(column='price', by='category', ax=axes[1])
axes[1].set_title('Kategoriye GÃ¶re Fiyat DaÄŸÄ±lÄ±mÄ±')
axes[1].set_xlabel('Kategori')
axes[1].set_ylabel('Fiyat (â‚º)')
axes[1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

In [None]:
# Ä°ÅŸlem verisi analizi
print("=== Ä°ÅžLEM VERÄ°SÄ° ANALÄ°ZÄ° ===")

# Tarih formatÄ±nÄ± dÃ¼zenle
transactions_df['transaction_date'] = pd.to_datetime(transactions_df['transaction_date'])
transactions_df['month'] = transactions_df['transaction_date'].dt.month
transactions_df['day_of_week'] = transactions_df['transaction_date'].dt.dayofweek
transactions_df['hour'] = transactions_df['transaction_date'].dt.hour

print("\n Ã–deme YÃ¶ntemi DaÄŸÄ±lÄ±mÄ±:")
print(transactions_df['payment_method'].value_counts())

print("\n Miktar Ä°statistikleri:")
print(transactions_df['quantity'].describe())

# Ä°ÅŸlem trendleri
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('Ä°ÅŸlem Verisi Analizi', fontsize=16)

# Ã–deme yÃ¶ntemi daÄŸÄ±lÄ±mÄ±
payment_counts = transactions_df['payment_method'].value_counts()
axes[0,0].pie(payment_counts.values, labels=payment_counts.index, autopct='%1.1f%%')
axes[0,0].set_title('Ã–deme YÃ¶ntemi DaÄŸÄ±lÄ±mÄ±')

# AylÄ±k satÄ±ÅŸ trendi
monthly_sales = transactions_df.groupby('month')['price'].sum()
axes[0,1].plot(monthly_sales.index, monthly_sales.values, marker='o')
axes[0,1].set_title('AylÄ±k SatÄ±ÅŸ Trendi')
axes[0,1].set_xlabel('Ay')
axes[0,1].set_ylabel('Toplam SatÄ±ÅŸ (â‚º)')

# HaftanÄ±n gÃ¼nlerine gÃ¶re satÄ±ÅŸ
daily_sales = transactions_df.groupby('day_of_week')['price'].sum()
day_names = ['Pazartesi', 'SalÄ±', 'Ã‡arÅŸamba', 'PerÅŸembe', 'Cuma', 'Cumartesi', 'Pazar']
axes[1,0].bar(range(7), daily_sales.values)
axes[1,0].set_xticks(range(7))
axes[1,0].set_xticklabels([d[:3] for d in day_names])
axes[1,0].set_title('HaftanÄ±n GÃ¼nlerine GÃ¶re SatÄ±ÅŸ')
axes[1,0].set_ylabel('Toplam SatÄ±ÅŸ (â‚º)')

# Saatlik satÄ±ÅŸ daÄŸÄ±lÄ±mÄ±
hourly_sales = transactions_df.groupby('hour')['price'].sum()
axes[1,1].plot(hourly_sales.index, hourly_sales.values, marker='o')
axes[1,1].set_title('Saatlik SatÄ±ÅŸ DaÄŸÄ±lÄ±mÄ±')
axes[1,1].set_xlabel('Saat')
axes[1,1].set_ylabel('Toplam SatÄ±ÅŸ (â‚º)')

plt.tight_layout()
plt.show()

In [None]:
# EtkileÅŸim verisi analizi
print("=== ETKÄ°LEÅžÄ°M VERÄ°SÄ° ANALÄ°ZÄ° ===")

# EtkileÅŸim tarihlerini dÃ¼zenle
interactions_df['interaction_date'] = pd.to_datetime(interactions_df['interaction_date'])
interactions_df['interaction_hour'] = interactions_df['interaction_date'].dt.hour

print("\n EtkileÅŸim TÃ¼rÃ¼ DaÄŸÄ±lÄ±mÄ±:")
interaction_counts = interactions_df['interaction_type'].value_counts()
print(interaction_counts)

# EtkileÅŸim analizi gÃ¶rselleÅŸtirme
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# EtkileÅŸim tÃ¼rÃ¼ daÄŸÄ±lÄ±mÄ±
axes[0].pie(interaction_counts.values, labels=interaction_counts.index, autopct='%1.1f%%')
axes[0].set_title('EtkileÅŸim TÃ¼rÃ¼ DaÄŸÄ±lÄ±mÄ±')

# Saatlik etkileÅŸim daÄŸÄ±lÄ±mÄ±
hourly_interactions = interactions_df.groupby('interaction_hour').size()
axes[1].plot(hourly_interactions.index, hourly_interactions.values, marker='o', color='orange')
axes[1].set_title('Saatlik EtkileÅŸim DaÄŸÄ±lÄ±mÄ±')
axes[1].set_xlabel('Saat')
axes[1].set_ylabel('EtkileÅŸim SayÄ±sÄ±')

plt.tight_layout()
plt.show()

## 3. Veri Ã–n Ä°ÅŸleme

Bu bÃ¶lÃ¼mde verilerimizi modelleme iÃ§in hazÄ±rlayacaÄŸÄ±z: eksik deÄŸerleri doldurma, aykÄ±rÄ± deÄŸerleri tespit etme, Ã¶zellik mÃ¼hendisliÄŸi ve normalizasyon iÅŸlemleri.

In [None]:
# Eksik deÄŸer analizi
print("=== EKSÄ°K DEÄžER ANALÄ°ZÄ° ===\n")

for name, df in datasets_info.items():
    missing_values = df.isnull().sum()
    missing_percent = (missing_values / len(df)) * 100
    
    if missing_values.sum() > 0:
        print(f" {name} - Eksik DeÄŸerler:")
        for col in df.columns:
            if missing_values[col] > 0:
                print(f"  {col}: {missing_values[col]} ({missing_percent[col]:.1f}%)")
    else:
        print(f" {name} - Eksik deÄŸer yok")
    print("-" * 30)

In [None]:
# Eksik deÄŸerleri doldurma (eÄŸer varsa)
def handle_missing_values(df):
    """Eksik deÄŸerleri uygun stratejilerle doldur"""
    df_filled = df.copy()
    
    # SayÄ±sal deÄŸiÅŸkenler iÃ§in median
    numeric_cols = df_filled.select_dtypes(include=[np.number]).columns
    for col in numeric_cols:
        if df_filled[col].isnull().sum() > 0:
            df_filled[col].fillna(df_filled[col].median(), inplace=True)
    
    # Kategorik deÄŸiÅŸkenler iÃ§in mode
    categorical_cols = df_filled.select_dtypes(include=['object']).columns
    for col in categorical_cols:
        if df_filled[col].isnull().sum() > 0:
            df_filled[col].fillna(df_filled[col].mode()[0], inplace=True)
    
    return df_filled

# Eksik deÄŸerleri iÅŸle
customers_df = handle_missing_values(customers_df)
products_df = handle_missing_values(products_df)
transactions_df = handle_missing_values(transactions_df)
interactions_df = handle_missing_values(interactions_df)

print(" Eksik deÄŸerler dolduruldu!")

In [None]:
# AykÄ±rÄ± deÄŸer tespiti ve temizleme
def detect_outliers(df, column, method='iqr'):
    """AykÄ±rÄ± deÄŸerleri tespit et"""
    if method == 'iqr':
        Q1 = df[column].quantile(0.25)
        Q3 = df[column].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        outliers = df[(df[column] < lower_bound) | (df[column] > upper_bound)]
    
    elif method == 'zscore':
        z_scores = np.abs((df[column] - df[column].mean()) / df[column].std())
        outliers = df[z_scores > 3]
    
    return outliers

# AykÄ±rÄ± deÄŸer analizi
print("=== AYKIRI DEÄžER ANALÄ°ZÄ° ===\n")

# SayÄ±sal deÄŸiÅŸkenler iÃ§in aykÄ±rÄ± deÄŸer kontrolÃ¼
numeric_columns = ['age', 'total_spent', 'purchase_frequency', 'price', 'quantity', 'discount']

for col in numeric_columns:
    if col in transactions_df.columns or col in customers_df.columns or col in products_df.columns:
        # Uygun dataframe'i seÃ§
        if col in customers_df.columns:
            df = customers_df
        elif col in transactions_df.columns:
            df = transactions_df
        elif col in products_df.columns:
            df = products_df
        else:
            continue
            
        outliers = detect_outliers(df, col)
        print(f" {col}: {len(outliers)} aykÄ±rÄ± deÄŸer tespit edildi")
        
        if len(outliers) > 0:
            # AykÄ±rÄ± deÄŸerleri gÃ¶rselleÅŸtir
            plt.figure(figsize=(10, 4))
            plt.subplot(1, 2, 1)
            plt.boxplot(df[col])
            plt.title(f'{col} - Box Plot')
            plt.ylabel(col)
            
            plt.subplot(1, 2, 2)
            plt.hist(df[col], bins=30, alpha=0.7)
            plt.title(f'{col} - Histogram')
            plt.xlabel(col)
            plt.ylabel('Frekans')
            
            plt.tight_layout()
            plt.show()

In [None]:
# Ã–zellik MÃ¼hendisliÄŸi
print("=== Ã–ZELLÄ°K MÃœHENDÄ°SLÄ°ÄžÄ° ===\n")

# 1. RFM Analizi (Recency, Frequency, Monetary)
def calculate_rfm(df_transactions, df_customers):
    """RFM analizini hesapla"""
    
    # En son iÅŸlem tarihi
    max_date = df_transactions['transaction_date'].max()
    
    # MÃ¼ÅŸteri baÅŸÄ±na RFM hesapla
    rfm = df_transactions.groupby('customer_id').agg({
        'transaction_date': 'max',  # Recency: Son iÅŸlem tarihi
        'transaction_id': 'count',  # Frequency: Ä°ÅŸlem sayÄ±sÄ±
        'price': 'sum'              # Monetary: Toplam harcama
    }).reset_index()
    
    rfm.columns = ['customer_id', 'last_purchase_date', 'frequency', 'monetary']
    
    # Recency'yi gÃ¼n olarak hesapla
    rfm['recency'] = (max_date - rfm['last_purchase_date']).dt.days
    
    # MÃ¼ÅŸteri verileriyle birleÅŸtir
    rfm = rfm.merge(df_customers[['customer_id', 'age', 'gender', 'city']], on='customer_id', how='left')
    
    return rfm

# RFM hesapla
rfm_data = calculate_rfm(transactions_df, customers_df)
print("RFM Analizi tamamlandÄ±!")
print(f"RFM verisi boyutu: {rfm_data.shape}")
print("\nRFM Ä°statistikleri:")
print(rfm_data[['recency', 'frequency', 'monetary']].describe())

In [None]:
# 2. MÃ¼ÅŸteri YaÅŸam Boyu DeÄŸeri (CLV) Hesaplama
def calculate_clv(rfm_df, discount_rate=0.01):
    """MÃ¼ÅŸteri YaÅŸam Boyu DeÄŸerini hesapla"""
    
    # Ortalama satÄ±n alma sÄ±klÄ±ÄŸÄ± (aylÄ±k)
    avg_purchase_frequency = rfm_df['frequency'] / 12  # 12 ay varsayÄ±mÄ±
    
    # Ortalama sipariÅŸ deÄŸeri
    avg_order_value = rfm_df['monetary'] / rfm_df['frequency']
    
    # YaÅŸam boyu deÄŸer (basit formÃ¼l)
    clv = avg_order_value * avg_purchase_frequency * (1 / discount_rate)
    
    rfm_df['clv'] = clv
    rfm_df['avg_order_value'] = avg_order_value
    rfm_df['avg_purchase_frequency'] = avg_purchase_frequency
    
    return rfm_df

# CLV hesapla
rfm_data = calculate_clv(rfm_data)

print(" CLV Hesaplama tamamlandÄ±!")
print("\nCLV Ä°statistikleri:")
print(rfm_data[['clv', 'avg_order_value', 'avg_purchase_frequency']].describe())

In [None]:
# 3. MÃ¼ÅŸteri segmentasyonu iÃ§in kategorik deÄŸiÅŸkenleri encode etme
def encode_categorical_features(df):
    """Kategorik deÄŸiÅŸkenleri encode et"""
    df_encoded = df.copy()
    
    # Gender encoding
    df_encoded['gender_encoded'] = df_encoded['gender'].map({'M': 1, 'F': 0})
    
    # City encoding (Label Encoding)
    le_city = LabelEncoder()
    df_encoded['city_encoded'] = le_city.fit_transform(df_encoded['city'])
    
    return df_encoded, le_city

# Kategorik deÄŸiÅŸkenleri encode et
rfm_encoded, city_encoder = encode_categorical_features(rfm_data)

print(" Kategorik deÄŸiÅŸkenler encode edildi!")
print(f"\nÅžehir encoding haritasÄ±:")
for i, city in enumerate(city_encoder.classes_):
    print(f"  {city}: {i}")

In [None]:
# 4. Veri normalizasyonu/Ã¶lÃ§ekleme
def scale_features(df, feature_columns):
    """Ã–zellikleri Ã¶lÃ§ekle"""
    scaler = StandardScaler()
    df_scaled = df.copy()
    
    # Ã–lÃ§eklenecek Ã¶zellikler
    scaled_features = scaler.fit_transform(df[feature_columns])
    
    # Scaled features'larÄ± DataFrame'e ekle
    for i, col in enumerate(feature_columns):
        df_scaled[f'{col}_scaled'] = scaled_features[:, i]
    
    return df_scaled, scaler

# Ã–lÃ§eklenecek Ã¶zellikler
features_to_scale = ['age', 'recency', 'frequency', 'monetary', 'clv', 'avg_order_value']

# Ã–zellikleri Ã¶lÃ§ekle
rfm_scaled, feature_scaler = scale_features(rfm_encoded, features_to_scale)

print(" Veri normalizasyonu tamamlandÄ±!")
print("\nÃ–lÃ§eklenmiÅŸ Ã¶zelliklerin istatistikleri:")
print(rfm_scaled[[f'{col}_scaled' for col in features_to_scale]].describe())

In [None]:
# HazÄ±rlanan verinin final kontrolÃ¼
print("=== HAZIRLANAN VERÄ° KONTROLÃœ ===\n")

print(f"Final RFM verisi boyutu: {rfm_scaled.shape}")
print(f"SÃ¼tunlar: {list(rfm_scaled.columns)}")
print(f"Eksik deÄŸerler: {rfm_scaled.isnull().sum().sum()}")

print("\nSon birkaÃ§ satÄ±r:")
print(rfm_scaled.head())

# Final veri setini kaydet
rfm_scaled.to_csv('data/processed_rfm_data.csv', index=False)
print("\n Ä°ÅŸlenmiÅŸ veri kaydedildi: data/processed_rfm_data.csv")

## 4. MÃ¼ÅŸteri Segmentasyonu (KÃ¼meleme)

Bu bÃ¶lÃ¼mde K-Means algoritmasÄ± kullanarak mÃ¼ÅŸterileri segmentlere ayÄ±racaÄŸÄ±z ve optimal cluster sayÄ±sÄ±nÄ± belirleyeceÄŸiz.

In [None]:
# Optimal cluster sayÄ±sÄ±nÄ± belirleme
print("=== KÃœMELEME ANALÄ°ZÄ° ===\n")

# KÃ¼meleme iÃ§in Ã¶zellikler
clustering_features = ['recency_scaled', 'frequency_scaled', 'monetary_scaled', 'clv_scaled', 'age_scaled']
X_clustering = rfm_scaled[clustering_features]

print(f"KÃ¼meleme iÃ§in kullanÄ±lacak Ã¶zellikler: {clustering_features}")
print(f"Veri boyutu: {X_clustering.shape}")

# Elbow Method
inertias = []
silhouette_scores = []
K_range = range(2, 11)

for k in K_range:
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
    kmeans.fit(X_clustering)
    inertias.append(kmeans.inertia_)
    silhouette_scores.append(silhouette_score(X_clustering, kmeans.labels_))

# Elbow Method gÃ¶rselleÅŸtirme
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Elbow plot
ax1.plot(K_range, inertias, marker='o')
ax1.set_title('Elbow Method (Within-Cluster Sum of Squares)')
ax1.set_xlabel('Cluster SayÄ±sÄ± (k)')
ax1.set_ylabel('WCSS')
ax1.grid(True)

# Silhouette score plot
ax2.plot(K_range, silhouette_scores, marker='s', color='orange')
ax2.set_title('Silhouette Score')
ax2.set_xlabel('Cluster SayÄ±sÄ± (k)')
ax2.set_ylabel('Silhouette Score')
ax2.grid(True)

plt.tight_layout()
plt.show()

# En iyi k deÄŸerini bul
best_k = K_range[np.argmax(silhouette_scores)]
print(f"\n En iyi cluster sayÄ±sÄ± (Silhouette Score'a gÃ¶re): {best_k}")
print(f"En yÃ¼ksek Silhouette Score: {max(silhouette_scores):.3f}")

# Tablo olarak gÃ¶ster
results_df = pd.DataFrame({
    'K': K_range,
    'WCSS': inertias,
    'Silhouette_Score': silhouette_scores
})
print("\nKÃ¼meleme SonuÃ§larÄ±:")
print(results_df)

In [None]:
# Optimal k ile final kÃ¼meleme
print(f"=== K={best_k} Ä°LE KÃœMELEME ===\n")

# Final K-Means model
final_kmeans = KMeans(n_clusters=best_k, random_state=42, n_init=10)
cluster_labels = final_kmeans.fit_predict(X_clustering)

# Cluster etiketlerini veriye ekle
rfm_scaled['cluster'] = cluster_labels

print(f"KÃ¼me daÄŸÄ±lÄ±mÄ±:")
cluster_counts = pd.Series(cluster_labels).value_counts().sort_index()
for i, count in cluster_counts.items():
    print(f"  Cluster {i}: {count} mÃ¼ÅŸteri (%{count/len(cluster_labels)*100:.1f})")

# Cluster merkezleri
cluster_centers = final_kmeans.cluster_centers_
centers_df = pd.DataFrame(cluster_centers, columns=clustering_features)
centers_df['cluster'] = range(best_k)

print("\nCluster Merkezleri (Ã¶lÃ§eklenmiÅŸ deÄŸerler):")
print(centers_df)

In [None]:
# Segment analizi ve yorumlama
print("=== SEGMENT ANALÄ°ZÄ° VE YORUMLAMA ===\n")

# Her cluster'Ä±n Ã¶zelliklerini analiz et
cluster_analysis = rfm_scaled.groupby('cluster').agg({
    'age': 'mean',
    'recency': 'mean',
    'frequency': 'mean',
    'monetary': 'mean',
    'clv': 'mean',
    'avg_order_value': 'mean'
}).round(2)

print("Cluster Ã–zellikleri:")
print(cluster_analysis)

# Segment isimlendirme ve yorumlama
segment_names = {}
for cluster_id in range(best_k):
    cluster_data = cluster_analysis.loc[cluster_id]
    
    # Segment karakteristiklerine gÃ¶re isimlendirme
    if cluster_data['recency'] < cluster_analysis['recency'].median() and cluster_data['frequency'] > cluster_analysis['frequency'].median():
        name = "Loyal Customers"
    elif cluster_data['monetary'] > cluster_analysis['monetary'].quantile(0.75):
        name = "High Value Customers"
    elif cluster_data['recency'] > cluster_analysis['recency'].quantile(0.75):
        name = "At Risk Customers"
    elif cluster_data['frequency'] < cluster_analysis['frequency'].quantile(0.25):
        name = "New Customers"
    else:
        name = f"Segment {cluster_id}"
    
    segment_names[cluster_id] = name
    
    print(f"\nCluster {cluster_id}: {name}")
    print(f"   MÃ¼ÅŸteri SayÄ±sÄ±: {cluster_counts[cluster_id]} (%{cluster_counts[cluster_id]/len(cluster_labels)*100:.1f})")
    print(f"   Ortalama YaÅŸ: {cluster_data['age']:.1f}")
    print(f"   Ortalama Recency: {cluster_data['recency']:.1f} gÃ¼n")
    print(f"   Ortalama Frequency: {cluster_data['frequency']:.1f}")
    print(f"   Ortalama Monetary: {cluster_data['monetary']:.2f} â‚º")
    print(f"   Ortalama CLV: {cluster_data['clv']:.2f} â‚º")

# Segment isimlerini veriye ekle
rfm_scaled['segment_name'] = rfm_scaled['cluster'].map(segment_names)

print("\n" + "="*50)
print("SEGMENT Ã–ZETÄ°:")
segment_summary = rfm_scaled['segment_name'].value_counts()
print(segment_summary)

In [None]:
# Segment gÃ¶rselleÅŸtirme
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('MÃ¼ÅŸteri Segmentleri Analizi', fontsize=16, fontweight='bold')

# 1. Segment daÄŸÄ±lÄ±mÄ± (pasta grafik)
segment_counts = rfm_scaled['segment_name'].value_counts()
axes[0,0].pie(segment_counts.values, labels=segment_counts.index, autopct='%1.1f%%')
axes[0,0].set_title('MÃ¼ÅŸteri Segment DaÄŸÄ±lÄ±mÄ±')

# 2. Recency vs Frequency scatter
colors = ['red', 'blue', 'green', 'orange', 'purple'][:best_k]
for i in range(best_k):
    cluster_data = rfm_scaled[rfm_scaled['cluster'] == i]
    axes[0,1].scatter(cluster_data['recency'], cluster_data['frequency'], 
                     c=colors[i], label=segment_names[i], alpha=0.6)
axes[0,1].set_xlabel('Recency (GÃ¼n)')
axes[0,1].set_ylabel('Frequency')
axes[0,1].set_title('Recency vs Frequency')
axes[0,1].legend()

# 3. Monetary vs CLV scatter
for i in range(best_k):
    cluster_data = rfm_scaled[rfm_scaled['cluster'] == i]
    axes[1,0].scatter(cluster_data['monetary'], cluster_data['clv'], 
                     c=colors[i], label=segment_names[i], alpha=0.6)
axes[1,0].set_xlabel('Monetary (â‚º)')
axes[1,0].set_ylabel('CLV (â‚º)')
axes[1,0].set_title('Monetary vs CLV')
axes[1,0].legend()

# 4. Segment baÅŸÄ±na ortalama CLV
avg_clv_by_segment = rfm_scaled.groupby('segment_name')['clv'].mean().sort_values(ascending=False)
axes[1,1].bar(range(len(avg_clv_by_segment)), avg_clv_by_segment.values)
axes[1,1].set_xticks(range(len(avg_clv_by_segment)))
axes[1,1].set_xticklabels(avg_clv_by_segment.index, rotation=45, ha='right')
axes[1,1].set_title('Segment BaÅŸÄ±na Ortalama CLV')
axes[1,1].set_ylabel('Ortalama CLV (â‚º)')

plt.tight_layout()
plt.show()

# Silhouette analizi
sample_silhouette_values = silhouette_samples(X_clustering, cluster_labels)

fig, ax = plt.subplots(figsize=(10, 6))
y_lower = 10
for i in range(best_k):
    cluster_silhouette_values = sample_silhouette_values[cluster_labels == i]
    cluster_silhouette_values.sort()
    
    size_cluster_i = cluster_silhouette_values.shape[0]
    y_upper = y_lower + size_cluster_i
    
    color = colors[i]
    ax.fill_betweenx(np.arange(y_lower, y_upper),
                     0, cluster_silhouette_values,
                     facecolor=color, edgecolor=color, alpha=0.7)
    
    ax.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))
    y_lower = y_upper + 10

ax.set_xlabel('Silhouette coefficient values')
ax.set_ylabel('Cluster label')
ax.set_title('Silhouette Analysis')

ax.axvline(x=np.mean(sample_silhouette_values), color="red", linestyle="--")
plt.show()

## 5. SatÄ±ÅŸ Tahmini (Regresyon)

Bu bÃ¶lÃ¼mde mÃ¼ÅŸteri segmentasyonu ve diÄŸer Ã¶zellikleri kullanarak gelecekteki satÄ±ÅŸlarÄ± tahmin edeceÄŸiz.

In [None]:
# SatÄ±ÅŸ tahmini iÃ§in veri hazÄ±rlama
print("=== SATIÅž TAHMÄ°NÄ° VERÄ° HAZIRLAMA ===\n")

# GÃ¼nlÃ¼k satÄ±ÅŸ verilerini hazÄ±rla
daily_sales = transactions_df.groupby('transaction_date').agg({
    'price': 'sum',
    'quantity': 'sum',
    'transaction_id': 'count'
}).reset_index()

daily_sales.columns = ['date', 'total_sales', 'total_quantity', 'transaction_count']

# Zaman Ã¶zellikleri ekle
daily_sales['day_of_week'] = daily_sales['date'].dt.dayofweek
daily_sales['month'] = daily_sales['date'].dt.month
daily_sales['day_of_month'] = daily_sales['date'].dt.day
daily_sales['quarter'] = daily_sales['date'].dt.quarter
daily_sales['is_weekend'] = (daily_sales['day_of_week'] >= 5).astype(int)

# Lag Ã¶zellikleri (gecikmeli deÄŸerler)
daily_sales['sales_lag1'] = daily_sales['total_sales'].shift(1)
daily_sales['sales_lag7'] = daily_sales['total_sales'].shift(7)
daily_sales['sales_lag30'] = daily_sales['total_sales'].shift(30)

# Hareketli ortalamalar
daily_sales['sales_ma7'] = daily_sales['total_sales'].rolling(window=7).mean()
daily_sales['sales_ma30'] = daily_sales['total_sales'].rolling(window=30).mean()

# Ä°lk eksik deÄŸerleri kaldÄ±r
daily_sales = daily_sales.dropna()

print(f" GÃ¼nlÃ¼k satÄ±ÅŸ verisi boyutu: {daily_sales.shape}")
print(f"Tarih aralÄ±ÄŸÄ±: {daily_sales['date'].min()} - {daily_sales['date'].max()}")
print("\nGÃ¼nlÃ¼k satÄ±ÅŸ istatistikleri:")
print(daily_sales['total_sales'].describe())

In [None]:
# Ã–zellik seÃ§imi ve hazÄ±rlama
# Hedef deÄŸiÅŸken: total_sales
target = 'total_sales'

# Ã–zellik seti
feature_columns = [
    'total_quantity', 'transaction_count', 'day_of_week', 'month', 
    'day_of_month', 'quarter', 'is_weekend', 'sales_lag1', 'sales_lag7',
    'sales_lag30', 'sales_ma7', 'sales_ma30'
]

X = daily_sales[feature_columns]
y = daily_sales[target]

print(f"Ã–zellik sayÄ±sÄ±: {len(feature_columns)}")
print(f"Ã–rnek sayÄ±sÄ±: {len(X)}")
print(f"Hedef deÄŸiÅŸken: {target}")

# EÄŸitim/test split
test_size = 0.2
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=test_size, shuffle=False, random_state=42
)

print(f"\nEÄŸitim seti boyutu: {X_train.shape}")
print(f"Test seti boyutu: {X_test.shape}")

# Ã–zellik Ã¶lÃ§ekleme
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(" Ã–zellik Ã¶lÃ§ekleme tamamlandÄ±!")

In [None]:
# Model eÄŸitimi ve deÄŸerlendirme
print("=== REGRESYON MODELLERÄ° ===\n")

models = {
    'Linear Regression': LinearRegression(),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(n_estimators=100, random_state=42)
}

# Model sonuÃ§larÄ±nÄ± saklayacak liste
results = []

for name, model in models.items():
    print(f"ðŸ”§ {name} eÄŸitiliyor...")
    
    # Model eÄŸitimi
    model.fit(X_train_scaled, y_train)
    
    # Tahminler
    y_train_pred = model.predict(X_train_scaled)
    y_test_pred = model.predict(X_test_scaled)
    
    # Metrikler
    train_mae = mean_absolute_error(y_train, y_train_pred)
    test_mae = mean_absolute_error(y_test, y_test_pred)
    train_rmse = np.sqrt(mean_squared_error(y_train, y_train_pred))
    test_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))
    train_r2 = r2_score(y_train, y_train_pred)
    test_r2 = r2_score(y_test, y_test_pred)
    
    # Cross-validation
    cv_scores = cross_val_score(model, X_train_scaled, y_train, cv=5, scoring='neg_mean_absolute_error')
    cv_mae = -cv_scores.mean()
    
    results.append({
        'Model': name,
        'Train MAE': train_mae,
        'Test MAE': test_mae,
        'Train RMSE': train_rmse,
        'Test RMSE': test_rmse,
        'Train RÂ²': train_r2,
        'Test RÂ²': test_r2,
        'CV MAE': cv_mae
    })
    
    print(f"   Test MAE: {test_mae:.2f}")
    print(f"   Test RMSE: {test_rmse:.2f}")
    print(f"   Test RÂ²: {test_r2:.3f}")
    print(f"   CV MAE: {cv_mae:.2f}")
    print("-" * 40)

# SonuÃ§larÄ± DataFrame'e Ã§evir
results_df = pd.DataFrame(results)
print("\n MODEL KARÅžILAÅžTIRMA:")
print(results_df.round(3))

In [None]:
# En iyi modeli seÃ§ ve detaylÄ± analiz
best_model_name = results_df.loc[results_df['Test MAE'].idxmin(), 'Model']
print(f" En iyi model: {best_model_name}")

# En iyi modeli yeniden eÄŸit
best_model = models[best_model_name]
best_model.fit(X_train_scaled, y_train)

# Feature importance (eÄŸer model destekliyorsa)
if hasattr(best_model, 'feature_importances_'):
    feature_importance = pd.DataFrame({
        'feature': feature_columns,
        'importance': best_model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    print(f"\n {best_model_name} - Feature Importance:")
    print(feature_importance)
    
    # Feature importance gÃ¶rselleÅŸtirme
    plt.figure(figsize=(10, 8))
    plt.barh(feature_importance['feature'], feature_importance['importance'])
    plt.title(f'{best_model_name} - Feature Importance')
    plt.xlabel('Importance')
    plt.tight_layout()
    plt.show()

elif hasattr(best_model, 'coef_'):
    feature_coef = pd.DataFrame({
        'feature': feature_columns,
        'coefficient': best_model.coef_
    }).sort_values('coefficient', key=abs, ascending=False)
    
    print(f"\n {best_model_name} - Feature Coefficients:")
    print(feature_coef)

# Tahmin vs gerÃ§ek deÄŸerler
y_test_pred = best_model.predict(X_test_scaled)

plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.scatter(y_test, y_test_pred, alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
plt.xlabel('GerÃ§ek DeÄŸerler')
plt.ylabel('Tahmin Edilen DeÄŸerler')
plt.title(f'{best_model_name} - Tahmin vs GerÃ§ek')

plt.subplot(1, 2, 2)
residuals = y_test - y_test_pred
plt.scatter(y_test_pred, residuals, alpha=0.6)
plt.axhline(y=0, color='r', linestyle='--')
plt.xlabel('Tahmin Edilen DeÄŸerler')
plt.ylabel('ArtÄ±klar (Residuals)')
plt.title('ArtÄ±k Analizi')

plt.tight_layout()
plt.show()

## 6. MÃ¼ÅŸteri DavranÄ±ÅŸ SÄ±nÄ±flandÄ±rmasÄ±

Bu bÃ¶lÃ¼mde mÃ¼ÅŸterileri loyal/non-loyal olarak sÄ±nÄ±flandÄ±racaÄŸÄ±z ve hangi faktÃ¶rlerin mÃ¼ÅŸteri sadakatini etkilediÄŸini analiz edeceÄŸiz.

In [None]:
# Loyal mÃ¼ÅŸteri sÄ±nÄ±flandÄ±rmasÄ± iÃ§in veri hazÄ±rlama
print("=== MÃœÅžTERÄ° DAVRANIÅž SINIFLANDIRMASI ===\n")

# Loyal mÃ¼ÅŸteri tanÄ±mÄ± (frequency ve monetary'e gÃ¶re)
frequency_threshold = rfm_scaled['frequency'].quantile(0.7)  # Ãœst %30
monetary_threshold = rfm_scaled['monetary'].quantile(0.7)   # Ãœst %30

print(f" Loyal MÃ¼ÅŸteri EÅŸikleri:")
print(f"  Frequency eÅŸiÄŸi: {frequency_threshold:.2f}")
print(f"  Monetary eÅŸiÄŸi: {monetary_threshold:.2f}")

# Loyal mÃ¼ÅŸteri etiketi oluÅŸtur
rfm_scaled['is_loyal'] = ((rfm_scaled['frequency'] >= frequency_threshold) & 
                          (rfm_scaled['monetary'] >= monetary_threshold)).astype(int)

loyal_counts = rfm_scaled['is_loyal'].value_counts()
print(f"\n MÃ¼ÅŸteri DaÄŸÄ±lÄ±mÄ±:")
print(f"  Loyal mÃ¼ÅŸteriler: {loyal_counts[1]} (%{loyal_counts[1]/len(rfm_scaled)*100:.1f})")
print(f"  Non-loyal mÃ¼ÅŸteriler: {loyal_counts[0]} (%{loyal_counts[0]/len(rfm_scaled)*100:.1f})")

# SÄ±nÄ±flandÄ±rma iÃ§in Ã¶zellikler
classification_features = [
    'age_scaled', 'recency_scaled', 'frequency_scaled', 
    'monetary_scaled', 'clv_scaled', 'avg_order_value_scaled',
    'gender_encoded', 'city_encoded'
]

X_class = rfm_scaled[classification_features]
y_class = rfm_scaled['is_loyal']

print(f"\n SÄ±nÄ±flandÄ±rma veri boyutu: {X_class.shape}")
print(f"Ã–zellikler: {classification_features}")

In [None]:
# Train/test split
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(
    X_class, y_class, test_size=0.2, random_state=42, stratify=y_class
)

print(f"EÄŸitim seti boyutu: {X_train_class.shape}")
print(f"Test seti boyutu: {X_test_class.shape}")
print(f"EÄŸitim seti loyal oranÄ±: {y_train_class.mean():.3f}")
print(f"Test seti loyal oranÄ±: {y_test_class.mean():.3f}")

# Ã–zellik Ã¶lÃ§ekleme
class_scaler = StandardScaler()
X_train_class_scaled = class_scaler.fit_transform(X_train_class)
X_test_class_scaled = class_scaler.transform(X_test_class)

print(" SÄ±nÄ±flandÄ±rma veri hazÄ±rlÄ±ÄŸÄ± tamamlandÄ±!")

In [None]:
# SÄ±nÄ±flandÄ±rma modelleri
print("=== SINIFLANDIRMA MODELLERÄ° ===\n")

classifiers = {
    'Logistic Regression': LogisticRegression(random_state=42),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42)
}

classification_results = []

for name, clf in classifiers.items():
    print(f" {name} eÄŸitiliyor...")
    
    # Model eÄŸitimi
    clf.fit(X_train_class_scaled, y_train_class)
    
    # Tahminler
    y_train_pred = clf.predict(X_train_class_scaled)
    y_test_pred = clf.predict(X_test_class_scaled)
    
    # SÄ±nÄ±flandÄ±rma raporu
    test_report = classification_report(y_test_class, y_test_pred, output_dict=True)
    train_report = classification_report(y_train_class, y_train_pred, output_dict=True)
    
    classification_results.append({
        'Model': name,
        'Train Accuracy': train_report['accuracy'],
        'Test Accuracy': test_report['accuracy'],
        'Test Precision': test_report['1']['precision'],
        'Test Recall': test_report['1']['recall'],
        'Test F1-Score': test_report['1']['f1-score']
    })
    
    print(f"   Test Accuracy: {test_report['accuracy']:.3f}")
    print(f"   Test Precision: {test_report['1']['precision']:.3f}")
    print(f"   Test Recall: {test_report['1']['recall']:.3f}")
    print(f"   Test F1-Score: {test_report['1']['f1-score']:.3f}")
    print("-" * 40)

# SonuÃ§larÄ± DataFrame'e Ã§evir
class_results_df = pd.DataFrame(classification_results)
print("\n SINIFLANDIRMA MODEL KARÅžILAÅžTIRMA:")
print(class_results_df.round(3))

In [None]:
# En iyi sÄ±nÄ±flandÄ±rma modelini seÃ§
best_classifier_name = class_results_df.loc[class_results_df['Test F1-Score'].idxmax(), 'Model']
print(f" En iyi sÄ±nÄ±flandÄ±rma modeli: {best_classifier_name}")

# En iyi modeli yeniden eÄŸit
best_classifier = classifiers[best_classifier_name]
best_classifier.fit(X_train_class_scaled, y_train_class)

# Confusion Matrix
y_test_pred_class = best_classifier.predict(X_test_class_scaled)
cm = confusion_matrix(y_test_class, y_test_pred_class)

plt.figure(figsize=(12, 5))

# Confusion Matrix Heatmap
plt.subplot(1, 2, 1)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Non-Loyal', 'Loyal'], 
            yticklabels=['Non-Loyal', 'Loyal'])
plt.title(f'{best_classifier_name} - Confusion Matrix')
plt.ylabel('GerÃ§ek DeÄŸer')
plt.xlabel('Tahmin Edilen')

# Classification Report
plt.subplot(1, 2, 2)
report = classification_report(y_test_class, y_test_pred_class, output_dict=True)
report_df = pd.DataFrame(report).transpose()
report_df = report_df.round(3)

# Heatmap olarak gÃ¶ster
sns.heatmap(report_df.iloc[:-1, :-1], annot=True, cmap='RdYlGn')
plt.title('Classification Report')
plt.tight_layout()
plt.show()

print("\n Classification Report:")
print(pd.DataFrame(classification_report(y_test_class, y_test_pred_class, output_dict=True)).round(3))

In [None]:
# Feature Importance analizi
print("=== FEATURE IMPORTANCE ANALÄ°ZÄ° ===\n")

if hasattr(best_classifier, 'feature_importances_'):
    # Random Forest iÃ§in feature importance
    feature_importance = pd.DataFrame({
        'feature': classification_features,
        'importance': best_classifier.feature_importances_
    }).sort_values('importance', ascending=False)
    
elif hasattr(best_classifier, 'coef_'):
    # Logistic Regression iÃ§in feature importance (coefficients)
    feature_importance = pd.DataFrame({
        'feature': classification_features,
        'importance': np.abs(best_classifier.coef_[0])
    }).sort_values('importance', ascending=False)

print(f" {best_classifier_name} - Feature Importance:")
print(feature_importance)

# Feature importance gÃ¶rselleÅŸtirme
plt.figure(figsize=(10, 8))
plt.barh(feature_importance['feature'], feature_importance['importance'])
plt.title(f'{best_classifier_name} - Feature Importance')
plt.xlabel('Importance')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()

# En Ã¶nemli Ã¶zellikler yorumu
print("\nðŸ’¡ EN Ã–NEMLÄ° Ã–ZELLÄ°KLER:")
top_features = feature_importance.head(5)
for idx, row in top_features.iterrows():
    feature_name = row['feature'].replace('_scaled', '').replace('_encoded', '')
    print(f"  {idx+1}. {feature_name}: {row['importance']:.3f}")

## 7. SonuÃ§lar ve Raporlama

Bu bÃ¶lÃ¼mde proje sonuÃ§larÄ±nÄ± Ã¶zetleyip iÅŸ Ã¶nerileri ve gelecek Ã§alÄ±ÅŸmalar iÃ§in Ã¶neriler sunacaÄŸÄ±z.

In [None]:
# PROJE SONUÃ‡LARI Ã–ZETÄ°
print("=" * 80)
print("                    PROJE SONUÃ‡LARI Ã–ZETÄ°")
print("=" * 80)

print("\n 1. VERÄ° ANALÄ°ZÄ° SONUÃ‡LARI:")
print(f"   â€¢ Toplam mÃ¼ÅŸteri sayÄ±sÄ±: {len(customers_df):,}")
print(f"   â€¢ Toplam Ã¼rÃ¼n sayÄ±sÄ±: {len(products_df):,}")
print(f"   â€¢ Toplam iÅŸlem sayÄ±sÄ±: {len(transactions_df):,}")
print(f"   â€¢ Toplam etkileÅŸim sayÄ±sÄ±: {len(interactions_df):,}")

avg_customer_value = customers_df['total_spent'].mean()
print(f"   â€¢ Ortalama mÃ¼ÅŸteri deÄŸeri: {avg_customer_value:.2f} â‚º")

print("\n 2. MÃœÅžTERÄ° SEGMENTASYONU:")
print(f"   â€¢ Optimum cluster sayÄ±sÄ±: {best_k}")
print(f"   â€¢ Segment daÄŸÄ±lÄ±mÄ±:")
for segment, count in segment_summary.items():
    print(f"     - {segment}: {count} mÃ¼ÅŸteri (%{count/len(rfm_scaled)*100:.1f})")

print("\n 3. SATIÅž TAHMÄ°NÄ°:")
best_regression = results_df.loc[results_df['Test MAE'].idxmin()]
print(f"   â€¢ En iyi model: {best_regression['Model']}")
print(f"   â€¢ Test MAE: {best_regression['Test MAE']:.2f}")
print(f"   â€¢ Test RÂ²: {best_regression['Test RÂ²']:.3f}")
print(f"   â€¢ Test RMSE: {best_regression['Test RMSE']:.2f}")

print("\n 4. MÃœÅžTERÄ° SINIFLANDIRMASI:")
best_classification = class_results_df.loc[class_results_df['Test F1-Score'].idxmax()]
print(f"   â€¢ En iyi model: {best_classification['Model']}")
print(f"   â€¢ Test Accuracy: {best_classification['Test Accuracy']:.3f}")
print(f"   â€¢ Test F1-Score: {best_classification['Test F1-Score']:.3f}")
print(f"   â€¢ Loyal mÃ¼ÅŸteri oranÄ±: %{y_class.mean()*100:.1f}")

print("\n 5. Ã–NE Ã‡IKAN BULGULAR:")
print(f"   â€¢ En deÄŸerli segment: {avg_clv_by_segment.index[0]}")
print(f"   â€¢ En yÃ¼ksek CLV: {avg_clv_by_segment.iloc[0]:.2f} â‚º")

# Feature importance'dan en Ã¶nemli faktÃ¶r
top_factor = feature_importance.iloc[0]['feature'].replace('_scaled', '').replace('_encoded', '')
print(f"   â€¢ MÃ¼ÅŸteri sadakatinde en Ã¶nemli faktÃ¶r: {top_factor}")

print("\n" + "=" * 80)

In [None]:
# Ä°ÅŸ Ã¶nerileri ve stratejiler
print("=" * 80)
print("                    Ä°Åž Ã–NERÄ°LERÄ° VE STRATEJÄ°LER")
print("=" * 80)

print("\ 1. MÃœÅžTERÄ° SEGMENTASYONU STRATEJÄ°LERÄ°:")
print("\n    Loyal Customers (%_loyal):")
print("   â€¢ Ã–zel mÃ¼ÅŸteri programlarÄ± oluÅŸturun")
print("   â€¢ Early access fÄ±rsatlarÄ± sunun")
print("   â€¢ KiÅŸiselleÅŸtirilmiÅŸ Ã¶neriler geliÅŸtirin")
print("   â€¢ Sadakat Ã¶dÃ¼lleri programlarÄ± uygulayÄ±n")

print("\n    High Value Customers:")
print("   â€¢ Premium hizmet paketleri oluÅŸturun")
print("   â€¢ VIP mÃ¼ÅŸteri temsilcisi atayÄ±n")
print("   â€¢ Exclusive Ã¼rÃ¼n lansmanlarÄ± yapÄ±n")
print("   â€¢ Ä°ndirim ve kampanya Ã¶nceliÄŸi verin")

print("\n     At Risk Customers:")
print("   â€¢ Win-back kampanyalarÄ± dÃ¼zenleyin")
print("   â€¢ Ã–zel indirimler teklif edin")
print("   â€¢ Anketlerle geri bildirim alÄ±n")
print("   â€¢ MÃ¼ÅŸteri hizmetleri ile iletiÅŸime geÃ§in")

print("\n 2. SATIÅž TAHMÄ°NÄ° VE Ä°Åž PLANLAMA:")
print("   â€¢ GÃ¼nlÃ¼k satÄ±ÅŸ tahminlerini kullanarak envanter yÃ¶netimini optimize edin")
print("   â€¢ YÃ¼ksek satÄ±ÅŸ gÃ¼nleri iÃ§in hazÄ±rlÄ±k yapÄ±n")
print("   â€¢ Sezonluk trendleri gÃ¶z Ã¶nÃ¼nde bulundurun")
print("   â€¢ Pazarlama bÃ¼tÃ§esini tahminlere gÃ¶re planlayÄ±n")

print("\n 3. MÃœÅžTERÄ° DAVRANIÅž ANALÄ°ZÄ°:")
print(f"   â€¢ En Ã¶nemli faktÃ¶r ({top_factor}) Ã¼zerinde odaklanÄ±n")
print("   â€¢ MÃ¼ÅŸteri yolculuÄŸunu optimize edin")
print("   â€¢ KiÅŸiselleÅŸtirilmiÅŸ deneyimler sunun")
print("   â€¢ EtkileÅŸim noktalarÄ±nÄ± gÃ¼Ã§lendirin")

print("\n  4. GENEL Ä°Åž GELÄ°ÅžTÄ°RME Ã–NERÄ°LERÄ°:")
print("   â€¢ Real-time analitik dashboard oluÅŸturun")
print("   â€¢ MÃ¼ÅŸteri yaÅŸam dÃ¶ngÃ¼sÃ¼ takibini otomatikleÅŸtirin")
print("   â€¢ AI destekli Ã¶neri sistemleri geliÅŸtirin")
print("   â€¢ Multi-channel entegrasyon saÄŸlayÄ±n")
print("   â€¢ A/B testleri ile stratejileri optimize edin")

print("\n" + "=" * 80)

In [None]:
# Gelecek Ã§alÄ±ÅŸmalar ve iyileÅŸtirmeler
print("=" * 80)
print("                 GELECEK Ã‡ALIÅžMALAR VE Ä°YÄ°LEÅžTÄ°RMELER")
print("=" * 80)

print("\n 1. VERÄ° BÄ°LÄ°MÄ° GELÄ°ÅžTÄ°RMELERÄ°:")
print("    Veri GeniÅŸletme:")
print("   â€¢ Daha fazla mÃ¼ÅŸteri demografik verisi toplayÄ±n")
print("   â€¢ Sosyal medya etkileÅŸim verilerini dahil edin")
print("   â€¢ ÃœrÃ¼n yorum ve puanlama verilerini kullanÄ±n")
print("   â€¢ Sesli mÃ¼ÅŸteri hizmetleri kayÄ±tlarÄ±nÄ± analiz edin")

print("\n    GeliÅŸmiÅŸ Modeller:")
print("   â€¢ Deep Learning modelleri deneyin (LSTM, GRU)")
print("   â€¢ Ensemble yÃ¶ntemlerini test edin")
print("   â€¢ Time series forecasting iÃ§in Prophet kullanÄ±n")
print("   â€¢ NLP analizi ile Ã¼rÃ¼n aÃ§Ä±klamalarÄ±nÄ± deÄŸerlendirin")

print("\ 2. TEKNÄ°K GELÄ°ÅžTÄ°RMELER:")
print("    Sistem Optimizasyonu:")
print("   â€¢ Real-time scoring sistemi kurun")
print("   â€¢ MLOps pipeline'Ä±nÄ± otomatikleÅŸtirin")
print("   â€¢ API servisi geliÅŸtirin")
print("   â€¢ Cloud tabanlÄ± Ã§Ã¶zÃ¼mler implementasyonu")

print("\n    GÃ¶rselleÅŸtirme ve Raporlama:")
print("   â€¢ Interactive dashboard geliÅŸtirin")
print("   â€¢ Automated reporting sistemi")
print("   â€¢ Mobile-friendly uygulamalar")
print("   â€¢ Executive summary raporlarÄ±")

print("\n 3. Ä°Åž UYGULAMALARI:")
print("    Operasyonel Entegrasyon:")
print("   â€¢ CRM sistemine entegre edin")
print("   â€¢ Marketing automation ile baÄŸlantÄ± kurun")
print("   â€¢ Inventory management sistemine dahil edin")
print("   â€¢ Customer support'ta kullanÄ±n")

print("\n    SÃ¼rekli Ä°yileÅŸtirme:")
print("   â€¢ Model performansÄ±nÄ± dÃ¼zenli izleyin")
print("   â€¢ A/B testing ile validasyon yapÄ±n")
print("   â€¢ Feedback loop kurun")
print("   â€¢ Business impact'ini Ã¶lÃ§Ã¼n")

print("\n  4. ARAÅžTIRMA KONULARI:")
print("   â€¢ Causal inference analizi")
print("   â€¢ Customer churn prediction")
print("   â€¢ Market basket analysis")
print("   â€¢ Recommendation systems optimization")
print("   â€¢ Dynamic pricing strategies")
print("   â€¢ Cross-channel customer journey mapping")

print("\n" + "=" * 80)
print("Bu projede elde edilen sonuÃ§lar ve Ã¶neriler,")
print(" iÅŸletmenizin mÃ¼ÅŸteri deneyimini iyileÅŸtirmek ve")
print(" bÃ¼yÃ¼me stratejilerinizi desteklemek iÃ§in kullanÄ±labilir.")
print("=" * 80)

In [None]:
# Final sonuÃ§larÄ± kaydet
print("=== SONUÃ‡LARI KAYDETME ===\n")

try:
    # Ana sonuÃ§larÄ± kaydet
    rfm_scaled.to_csv('data/final_rfm_with_segments.csv', index=False)
    print("RFM verisi segmentlerle birlikte kaydedildi: data/final_rfm_with_segments.csv")
    
    # Model sonuÃ§larÄ±nÄ± kaydet
    results_df.to_csv('data/model_performance_results.csv', index=False)
    print("Model performans sonuÃ§larÄ± kaydedildi: data/model_performance_results.csv")
    
    class_results_df.to_csv('data/classification_results.csv', index=False)
    print(" SÄ±nÄ±flandÄ±rma sonuÃ§larÄ± kaydedildi: data/classification_results.csv")
    
    # Feature importance'Ä± kaydet
    feature_importance.to_csv('data/feature_importance.csv', index=False)
    print("Feature importance kaydedildi: data/feature_importance.csv")
    
    # Segment analizini kaydet
    cluster_analysis.to_csv('data/cluster_analysis.csv')
    print(" Cluster analizi kaydedildi: data/cluster_analysis.csv")
    
    # GÃ¼nlÃ¼k satÄ±ÅŸ verilerini kaydet
    daily_sales.to_csv('data/daily_sales_data.csv', index=False)
    print(" GÃ¼nlÃ¼k satÄ±ÅŸ verileri kaydedildi: data/daily_sales_data.csv")
    
    print(" TÃ¼m sonuÃ§lar 'data/' klasÃ¶rÃ¼ne kaydedildi.")
    
except Exception as e:
    print(f" Kaydetme sÄ±rasÄ±nda hata: {e}")
    
print("\n" + "="*50)
print("JUPYTER NOTEBOOK SONUNDA!")
print("TÃ¼m analizler tamamlandÄ± ve sonuÃ§lar kaydedildi.")
print(" Elde edilen insights'larÄ± iÅŸ stratejilerinizde kullanabilirsiniz.")
print("="*50)