# Cihaz Kümeleme için Veri Ön İşleme

Bu notebook, `mobil_restaurant.csv` dosyasındaki veriyi cihaz bazlı kümeleme için hazırlar.

Adımlar:
1. Google Drive'ı bağlama.
2. Gerekli kütüphaneleri yükleme.
3. Veriyi Google Drive'dan yükleme (timestamp dahil).
4. Özellik dönüşümlerini uygulama (Aşama 2) (timestamp dahil).
5. Dönüştürülmüş veriyi kaydetme (`transformed_visits.csv`).
6. Cihaz bazlı özellik mühendisliği (Aşama 3) (zaman dilimi özelliklerini ekleyerek).
7. Cihaz bazlı özellikleri kaydetme (`device_features.csv`).
8. Özellik filtreleme (düşük varyans, yüksek korelasyon).
9. Özellik ölçeklendirme.
10. Optimal küme sayısını belirleme ve K-Means kümeleme uygulama.
11. Kümelenmiş veriyi kaydetme (`device_clusters.csv`).
12. Küme personaları için görsel analizler (grafikleri Drive'a kaydetme dahil).
13. Küme Bazlı Müşteri Çeşidi Analizi (EKLENDİ)

## 1. Google Drive'ı Bağlama

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## 2. Gerekli Kütüphaneleri Yükleme

In [None]:
import pandas as pd
import numpy as np
import re
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

## 3. Veriyi Google Drive'dan Yükleme

Lütfen `file_path_on_drive` değişkenini CSV dosyanızın Google Drive'daki doğru yoluyla güncelleyin.

In [None]:
file_path_on_drive = '/content/drive/MyDrive/mobil_restaurant.csv'  # BU YOLU GÜNCELLEYİN!

columns_to_load = [
    'device_aid',
    'SatisHacmi',
    'BiletEtkinlik',
    'OrtalamaHarcamaTutari',
    'Mapin Segment',
    'timestamp'  # Zaman damgası sütununu ekledik
]

print(f"Loading {file_path_on_drive}...")
try:
    try:
        df = pd.read_csv(file_path_on_drive, usecols=columns_to_load, sep=';', low_memory=False)
    except UnicodeDecodeError:
        print("UTF-8 decoding failed, trying latin1...")
        df = pd.read_csv(file_path_on_drive, usecols=columns_to_load, sep=';', low_memory=False, encoding='latin1')
    print("CSV loaded successfully.")
except FileNotFoundError:
    print(f"Error: The file {file_path_on_drive} was not found. Please check the path.")
    df = None
except Exception as e:
    print(f"Error loading CSV: {e}")
    df = None

if df is not None:
    print("Initial data sample:")
    print(df.head())

## 4. Özellik Dönüşümlerini Uygulama (Aşama 2)

In [None]:
if df is None:
    print("DataFrame 'df' not loaded. Cannot proceed with transformations.")
else:
    # 1. SatisHacmi_Numeric
    satis_hacmi_mapping = {'S1': 0, 'S2': 1, 'S3': 2}
    df['SatisHacmi_Numeric'] = df['SatisHacmi'].map(satis_hacmi_mapping).fillna(-1)
    print("SatisHacmi_Numeric created.")

    # 2. BiletEtkinlik_Numeric
    bilet_etkinlik_mapping = {'Var': 1, 'Yok': 0}
    df['BiletEtkinlik_Numeric'] = df['BiletEtkinlik'].map(bilet_etkinlik_mapping).fillna(-1)
    print("BiletEtkinlik_Numeric created.")

    # 3. OrtalamaHarcamaTutari_Numeric
    harcama_tutari_mapping = {
        '0-499 TL': 1, '500-999 TL': 2, '1000-1.999 TL': 3, '2.000+TL': 4
    }
    df['OrtalamaHarcamaTutari_Cleaned'] = df['OrtalamaHarcamaTutari'].fillna('Boş').astype(str).str.strip()
    df['OrtalamaHarcamaTutari_Numeric'] = df['OrtalamaHarcamaTutari_Cleaned'].map(harcama_tutari_mapping).fillna(0)
    print("OrtalamaHarcamaTutari_Numeric created.")

    # 4. Mapin Segment Ayrıştırması
    def parse_mapin_segment(segment_str):
        if pd.isna(segment_str) or not isinstance(segment_str, str) or len(segment_str) < 2:
            return pd.NA, pd.NA, pd.NA
        venue_type = segment_str[0] if segment_str[0] in ['D', 'R', 'H'] else pd.NA
        pop_match = re.search(r'[DRH](\d)', segment_str)
        population_score = int(pop_match.group(1)) if pop_match and 1 <= int(pop_match.group(1)) <= 5 else pd.NA
        quality_char = segment_str.split('-')[-1] if '-' in segment_str and len(segment_str.split('-')[-1]) == 1 else pd.NA
        quality_mapping = {'A': 2, 'B': 1, 'C': 0}
        quality_score_numeric = quality_mapping.get(quality_char, pd.NA)
        return venue_type, population_score, quality_score_numeric
    
    parsed_segments = df['Mapin Segment'].apply(parse_mapin_segment)
    df['VenueType_Parsed'] = parsed_segments.apply(lambda x: x[0] if isinstance(x, tuple) else pd.NA)
    df['PopulationInverseScore_Parsed'] = parsed_segments.apply(lambda x: x[1] if isinstance(x, tuple) else pd.NA)
    df['QualityScore_Numeric'] = parsed_segments.apply(lambda x: x[2] if isinstance(x, tuple) else pd.NA)
    print("Mapin Segment parsed.")

    # Timestamp'i olduğu gibi bırakıyoruz, sonraki aşamada işlenecek
    df['timestamp_processed'] = pd.to_datetime(df['timestamp'], unit='s', errors='coerce')
    print("Timestamp converted to datetime.")

    output_columns = [
        'device_aid',
        'SatisHacmi_Numeric',
        'BiletEtkinlik_Numeric',
        'OrtalamaHarcamaTutari_Numeric',
        'VenueType_Parsed',
        'PopulationInverseScore_Parsed',
        'QualityScore_Numeric',
        'timestamp_processed' # İşlenmiş timestamp'i ekledik
    ]
    df_transformed = df[output_columns].copy()
    df_transformed.dropna(subset=['timestamp_processed'], inplace=True) # Hatalı timestamp olan satırları çıkar

    print("\nTransformed data sample:")
    print(df_transformed.head().to_string())
    print("\nTransformed data info:")
    df_transformed.info()

## 5. Dönüştürülmüş Veriyi Kaydetme

In [None]:
if 'df_transformed' in locals() and df_transformed is not None:
    output_csv_path = 'transformed_visits.csv'
    print(f"Saving transformed data to {output_csv_path}...")
    try:
        df_transformed.to_csv(output_csv_path, index=False, sep=';')
        print(f"Transformed data saved successfully to {output_csv_path}.")
    except Exception as e:
        print(f"Error saving CSV: {e}")
else:
    print("df_transformed DataFrame not found or is None. Cannot save.")

## 6. Cihaz Bazlı Özellik Mühendisliği (Aşama 3)

In [None]:
transformed_visits_path = 'transformed_visits.csv'
print(f"Loading {transformed_visits_path} for device-level aggregation...")
try:
    df_visits = pd.read_csv(transformed_visits_path, sep=';')
    df_visits['timestamp_processed'] = pd.to_datetime(df_visits['timestamp_processed'], errors='coerce')
    df_visits.dropna(subset=['timestamp_processed'], inplace=True)
    print("Transformed visits data loaded successfully.")
except FileNotFoundError:
    print(f"Error: {transformed_visits_path} not found.")
    df_visits = None
except Exception as e:
    print(f"Error loading {transformed_visits_path}: {e}")
    df_visits = None

In [None]:
if df_visits is not None:
    print("Aggregating features per device_aid...")

    # Zaman Dilimi Özellikleri
    df_visits['visit_hour'] = df_visits['timestamp_processed'].dt.hour
    
    def get_time_slot(hour):
        if 6 <= hour <= 11: return 'Morning'    # 06:00 - 11:59
        elif 12 <= hour <= 17: return 'Afternoon' # 12:00 - 17:59
        elif 18 <= hour <= 23: return 'Evening'   # 18:00 - 23:59
        else: return 'Night'                      # 00:00 - 05:59
        
    df_visits['TimeSlot'] = df_visits['visit_hour'].apply(get_time_slot)
    
    time_slot_dummies = pd.get_dummies(df_visits['TimeSlot'], prefix='TimeSlot')
    df_visits_with_time_dummies = pd.concat([df_visits[['device_aid']], time_slot_dummies], axis=1)
    df_time_slot_counts = df_visits_with_time_dummies.groupby('device_aid').sum()

    # Ana Agregasyonlar
    aggregation_functions = {
        'SatisHacmi_Numeric': 'mean',
        'BiletEtkinlik_Numeric': 'mean',
        'OrtalamaHarcamaTutari_Numeric': 'mean',
        'PopulationInverseScore_Parsed': 'mean',
        'QualityScore_Numeric': 'mean',
        'device_aid': 'count'
    }
    df_device_features = df_visits.groupby('device_aid').agg(aggregation_functions)
    df_device_features.rename(columns={'device_aid': 'total_visits'}, inplace=True)

    # VenueType Oranları
    venue_type_dummies = pd.get_dummies(df_visits['VenueType_Parsed'], prefix='VenueType')
    df_visits_with_venue_dummies = pd.concat([df_visits['device_aid'], venue_type_dummies], axis=1)
    df_venue_type_counts = df_visits_with_venue_dummies.groupby('device_aid').sum()
    df_device_features = df_device_features.join(df_venue_type_counts)
    
    # Zaman Dilimi Oranlarını Ekleme
    df_device_features = df_device_features.join(df_time_slot_counts)

    # Oranları Hesaplama (VenueType ve TimeSlot için)
    rate_cols_to_create = list(venue_type_dummies.columns) + list(time_slot_dummies.columns)
    for col_name in rate_cols_to_create:
        if col_name in df_device_features.columns and 'total_visits' in df_device_features.columns:
            df_device_features[col_name + '_rate'] = df_device_features.apply(
                lambda row: row[col_name] / row['total_visits'] if row['total_visits'] > 0 else 0, axis=1
            )
            df_device_features.drop(columns=[col_name], inplace=True)
        elif col_name not in df_device_features.columns: # Eğer bir dummy hiç oluşmadıysa (örn: hiç Night ziyareti yoksa)
             df_device_features[col_name + '_rate'] = 0

    df_device_features.rename(columns={
        'SatisHacmi_Numeric': 'avg_SatisHacmi',
        'BiletEtkinlik_Numeric': 'rate_BiletEtkinlik_Var',
        'OrtalamaHarcamaTutari_Numeric': 'avg_OrtalamaHarcamaTutari',
        'PopulationInverseScore_Parsed': 'avg_PopulationInverseScore',
        'QualityScore_Numeric': 'avg_QualityScore'
    }, inplace=True)
    
    df_device_features = df_device_features.fillna(0)
    
    print("\nDevice features aggregated (including time slots).")
    print(df_device_features.head().to_string())
    print("\nInfo for aggregated device features:")
    df_device_features.info()
else:
    print("df_visits DataFrame not found. Cannot aggregate.")

## 7. Cihaz Bazlı Özellikleri Kaydetme

In [None]:
if 'df_device_features' in locals() and df_device_features is not None:
    device_features_path = 'device_features.csv'
    print(f"Saving device features to {device_features_path}...")
    try:
        df_device_features.to_csv(device_features_path, sep=';')
        print(f"Device features saved successfully to {device_features_path}.")
    except Exception as e:
        print(f"Error saving device features CSV: {e}")
else:
    print("df_device_features DataFrame not found. Cannot save.")

## 8. Özellik Filtreleme ve Ölçeklendirme

In [None]:
device_features_path = 'device_features.csv'
print(f"Loading {device_features_path} for filtering and scaling...")
try:
    df_agg_features = pd.read_csv(device_features_path, sep=';', index_col='device_aid')
    print("Aggregated device features loaded successfully.")
except FileNotFoundError:
    print(f"Error: {device_features_path} not found.")
    df_agg_features = None
except Exception as e:
    print(f"Error loading {device_features_path}: {e}")
    df_agg_features = None

if df_agg_features is not None:
    print("\nData types before filtering/scaling:")
    print(df_agg_features.dtypes)
    df_features_to_filter = df_agg_features.copy()
else:
    print("Skipping feature filtering/scaling as df_agg_features was not loaded.")
    df_features_to_filter = None

### 8.1. Düşük Varyanslı Özelliklerin Çıkarılması

In [None]:
if df_features_to_filter is not None:
    print("\nOriginal number of features:", df_features_to_filter.shape[1])
    variance_threshold_value = 0.01 
    selector = VarianceThreshold(threshold=variance_threshold_value)
    try:
        selector.fit(df_features_to_filter)
        retained_features_mask = selector.get_support()
        removed_low_variance_features = df_features_to_filter.columns[~retained_features_mask]
        df_agg_features = df_features_to_filter.loc[:, retained_features_mask].copy()
        if len(removed_low_variance_features) > 0:
            print(f"Removed {len(removed_low_variance_features)} low-variance features (threshold < {variance_threshold_value}):")
            for feature in removed_low_variance_features:
                print(f"- {feature} (Variance: {df_features_to_filter[feature].var():.4f})")
        else:
            print(f"No features removed by variance threshold (< {variance_threshold_value}).")
        print(f"Number of features after variance filtering: {df_agg_features.shape[1]}")
    except ValueError as e:
        print(f"Error during variance thresholding: {e}. Skipping.")
        df_agg_features = df_features_to_filter.copy()
else:
    print("Skipping variance thresholding.")

### 8.2. Yüksek Korelasyonlu Özelliklerin Çıkarılması

In [None]:
if 'df_agg_features' in locals() and df_agg_features is not None and df_agg_features.shape[1] > 1:
    print("\nStarting high-correlation feature removal...")
    corr_matrix = df_agg_features.corr().abs()
    upper_triangle = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
    correlation_threshold = 0.90
    features_to_drop_corr = set()
    removed_correlation_info = []
    for column in upper_triangle.columns:
        if column in features_to_drop_corr: continue
        highly_correlated_with_column = upper_triangle[upper_triangle[column] > correlation_threshold].index
        for feature in highly_correlated_with_column:
            if feature not in features_to_drop_corr:
                features_to_drop_corr.add(feature)
                removed_correlation_info.append(f"- '{feature}' (>{correlation_threshold*100:.0f}% vs '{column}': {corr_matrix.loc[column, feature]:.2f})")
    if len(features_to_drop_corr) > 0:
        df_agg_features.drop(columns=list(features_to_drop_corr), inplace=True)
        print(f"Removed {len(features_to_drop_corr)} highly-correlated features (threshold > {correlation_threshold}):")
        for info in removed_correlation_info:
            print(info)
    else:
        print(f"No features removed by correlation threshold (> {correlation_threshold}).")
    print(f"Number of features after correlation filtering: {df_agg_features.shape[1]}")
elif 'df_agg_features' in locals() and df_agg_features is not None:
    print("Skipping correlation filtering (1 or no features left).")
else:
    print("Skipping correlation filtering.")

### 8.3. Ölçeklendirme (Filtrelenmiş Özelliklerle)

In [None]:
if 'df_agg_features' in locals() and df_agg_features is not None and df_agg_features.shape[1] > 0:
    print("\nScaling the remaining features...")
    feature_columns = df_agg_features.columns
    scaler = StandardScaler()
    df_scaled_features_array = scaler.fit_transform(df_agg_features[feature_columns])
    df_scaled_features = pd.DataFrame(df_scaled_features_array, columns=feature_columns, index=df_agg_features.index)
    print("Features scaled successfully.")
    print(df_scaled_features.head().to_string())
    print(f"Shape of scaled features: {df_scaled_features.shape}")
elif 'df_agg_features' in locals() and df_agg_features is not None:
    print("No features remaining after filtering. Clustering cannot proceed.")
    df_scaled_features = None
else:
    print("Skipping scaling.")
    df_scaled_features = None

## 9. Aşama 5: Kümeleme Algoritmasının Uygulanması (K-Means)

In [None]:
if 'df_scaled_features' in locals() and df_scaled_features is not None and not df_scaled_features.empty:
    inertia = []
    silhouette_scores = []
    k_range = range(2, 11)
    print("\nCalculating inertia and silhouette scores for K range...")
    for k_val in k_range:
        kmeans = KMeans(n_clusters=k_val, random_state=42, n_init='auto')
        kmeans.fit(df_scaled_features)
        inertia.append(kmeans.inertia_)
        score = silhouette_score(df_scaled_features, kmeans.labels_)
        silhouette_scores.append(score)
        print(f"K={k_val}, Inertia: {kmeans.inertia_:.2f}, Silhouette Score: {score:.4f}")

    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(k_range, inertia, marker='o')
    plt.title('Elbow Method for Optimal K')
    plt.xlabel('Number of Clusters (K)')
    plt.ylabel('Inertia')
    plt.xticks(k_range)
    plt.grid(True)

    plt.subplot(1, 2, 2)
    plt.plot(k_range, silhouette_scores, marker='o')
    plt.title('Silhouette Scores for Optimal K')
    plt.xlabel('Number of Clusters (K)')
    plt.ylabel('Silhouette Score')
    plt.xticks(k_range)
    plt.grid(True)
    plt.tight_layout()
    plt.show()
    
    if silhouette_scores:
        optimal_k = k_range[np.argmax(silhouette_scores)]
        print(f"\nOptimal K based on highest Silhouette Score: {optimal_k}")
    else:
        print("Could not determine optimal K. Defaulting to K=3.")
        optimal_k = 3
else:
    print("\nSkipping K selection as scaled features are not available or empty.")
    optimal_k = None

### 9.1. K-Means Kümelemesini Uygulama (Optimal K ile)

In [None]:
if 'df_scaled_features' in locals() and df_scaled_features is not None and not df_scaled_features.empty and optimal_k is not None:
    print(f"\nApplying K-Means with K={optimal_k}...")
    kmeans_final = KMeans(n_clusters=optimal_k, random_state=42, n_init='auto')
    cluster_labels = kmeans_final.fit_predict(df_scaled_features)
    
    df_clustered_devices = df_agg_features.copy() # Use features before scaling for easier interpretation
    df_clustered_devices['cluster'] = cluster_labels
    
    print("K-Means clustering applied. Cluster labels added.")
    print(df_clustered_devices.head().to_string())
    print("\nCluster sizes:")
    print(df_clustered_devices['cluster'].value_counts().sort_index())
else:
    print("\nSkipping K-Means application.")
    df_clustered_devices = None

## 10. Kümelenmiş Veriyi Kaydetme ve Küme Analizi

In [None]:
if 'df_clustered_devices' in locals() and df_clustered_devices is not None:
    clustered_output_path = 'device_clusters.csv'
    print(f"\nSaving clustered device data to {clustered_output_path}...")
    try:
        df_clustered_devices.to_csv(clustered_output_path, sep=';')
        print(f"Clustered device data saved successfully to {clustered_output_path}.")
    except Exception as e:
        print(f"Error saving clustered data CSV: {e}")
    
    print("\nCluster characteristics (mean values of features per cluster):")
    cluster_analysis = df_clustered_devices.groupby('cluster').mean()
    print(cluster_analysis.to_string())
else:
    print("\ndf_clustered_devices DataFrame not found. Skipping save and analysis.")
    cluster_analysis = None

### 10.1. Küme Personaları için Görsel Analizler

In [None]:
if 'cluster_analysis' in locals() and cluster_analysis is not None and 'df_clustered_devices' in locals() and df_clustered_devices is not None:
    print("\nGenerating visualizations for cluster personas...")
    
    drive_save_path_base = '/content/drive/MyDrive/restaurant_clustering_results'
    if not os.path.exists(drive_save_path_base):
        os.makedirs(drive_save_path_base)
        print(f"Created directory: {drive_save_path_base}")
    
    features_for_plotting = cluster_analysis.columns.tolist()
    num_clusters = len(cluster_analysis)

    # Radar Charts (using scaled cluster centers from kmeans_final)
    if 'kmeans_final' in locals() and hasattr(kmeans_final, 'cluster_centers_') and 'df_scaled_features' in locals() and df_scaled_features is not None:
        scaled_cluster_centers = kmeans_final.cluster_centers_
        feature_names_for_radar = df_scaled_features.columns.tolist()
        angles = np.linspace(0, 2 * np.pi, len(feature_names_for_radar), endpoint=False).tolist()
        angles += angles[:1]
        fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(polar=True))
        for i in range(num_clusters):
            values = scaled_cluster_centers[i].tolist()
            values += values[:1]
            ax.plot(angles, values, linewidth=2, linestyle='solid', label=f"Cluster {i}")
            ax.fill(angles, values, alpha=0.25)
        ax.set_xticks(angles[:-1])
        ax.set_xticklabels(feature_names_for_radar, size=8)
        ax.set_title("Radar Chart of Scaled Cluster Centers", size=16, y=1.1)
        ax.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))
        radar_chart_path = os.path.join(drive_save_path_base, 'radar_chart_cluster_personas.png')
        try: plt.savefig(radar_chart_path, bbox_inches='tight'); print(f"Radar chart saved to: {radar_chart_path}")
        except Exception as e: print(f"Error saving radar chart: {e}")
        plt.show()
    else: print("Skipping radar chart: prerequisites not met.")

    # Box Plots (using unscaled data in df_clustered_devices)
    if features_for_plotting and not df_clustered_devices.empty:
        print("\nGenerating box plots...")
        num_features_to_plot = len(features_for_plotting)
        cols_subplot = 3
        rows_subplot = (num_features_to_plot + cols_subplot - 1) // cols_subplot
        plt.figure(figsize=(15, rows_subplot * 5))
        for i, feature in enumerate(features_for_plotting):
            plt.subplot(rows_subplot, cols_subplot, i + 1)
            sns.boxplot(x='cluster', y=feature, data=df_clustered_devices, palette='viridis')
            plt.title(f'{feature} by Cluster', fontsize=10)
        plt.tight_layout()
        box_plots_path = os.path.join(drive_save_path_base, 'box_plots_feature_distribution.png')
        try: plt.savefig(box_plots_path, bbox_inches='tight'); print(f"Box plots saved to: {box_plots_path}")
        except Exception as e: print(f"Error saving box plots: {e}")
        plt.show()
    else: print("Skipping box plots: no features or data.")

    # Bar charts of mean feature values (from cluster_analysis)
    if not cluster_analysis.empty:
        print("\nGenerating bar chart of mean features...")
        cluster_analysis.T.plot(kind='bar', figsize=(15, 8), colormap='viridis')
        plt.title('Mean Feature Values by Cluster (Original Scale)')
        plt.ylabel('Mean Value')
        plt.xticks(rotation=45, ha='right')
        plt.legend(title='Cluster')
        plt.tight_layout()
        bar_chart_path = os.path.join(drive_save_path_base, 'bar_chart_mean_features.png')
        try: plt.savefig(bar_chart_path, bbox_inches='tight'); print(f"Bar chart saved to: {bar_chart_path}")
        except Exception as e: print(f"Error saving bar chart: {e}")
        plt.show()
    else: print("Skipping bar chart: cluster_analysis is empty.")
else:
    print("\nSkipping persona visualizations: prerequisites not met.")

--- 
**Sonraki Adımlar:**
1. Bu notebook'u çalıştırın ve `file_path_on_drive` değişkenini güncellediğinizden emin olun.
2. Tüm adımların (veri yükleme, dönüşüm, özellik mühendisliği, filtreleme, ölçeklendirme, kümeleme, görselleştirme) çıktılarını inceleyin.
3. Özellikle, hangi özelliklerin çıkarıldığını, optimal K seçimini ve küme personalarını gösteren grafikleri değerlendirin.
4. `device_clusters.csv` dosyası ve Google Drive'a kaydedilen grafikler nihai çıktılarınızdır.

## 11. Küme Bazlı Müşteri Çeşidi Analizi

In [None]:
print("Starting post-clustering analysis for MusteriCesidi...")

# Gerekli dosyaların yolları
clustered_devices_filepath = 'device_clusters.csv' # Kümelenmiş cihaz verisi
# mobil_restaurant.csv dosyasının yolu daha önce 'file_path_on_drive' olarak tanımlanmıştı.
# Eğer bu hücre ayrı çalıştırılıyorsa veya kapsam dışı kaldıysa, yeniden tanımlamak gerekebilir.
# Şimdilik 'file_path_on_drive' değişkeninin hala geçerli olduğunu varsayıyoruz.
original_data_filepath = file_path_on_drive 

try:
    # Kümelenmiş cihaz verisini yükle
    print(f"Loading clustered device data from {clustered_devices_filepath}...")
    df_clusters = pd.read_csv(clustered_devices_filepath, sep=';')
    print(f"Clustered data loaded. Shape: {df_clusters.shape}")
    if 'device_aid' not in df_clusters.columns or 'cluster' not in df_clusters.columns:
        print("Error: 'device_aid' or 'cluster' column missing in device_clusters.csv")
        raise SystemExit("Exiting due to missing columns in cluster data.")

    # Orijinal mobil_restaurant verisinden sadece device_aid ve MusteriCesidi sütunlarını yükle
    print(f"Loading MusteriCesidi data from {original_data_filepath}...")
    try:
        df_musteri_cesidi = pd.read_csv(original_data_filepath, sep=';', usecols=['device_aid', 'MusteriCesidi'], low_memory=False)
    except UnicodeDecodeError:
        print("UTF-8 decoding failed for mobil_restaurant.csv, trying latin1...")
        df_musteri_cesidi = pd.read_csv(original_data_filepath, sep=';', usecols=['device_aid', 'MusteriCesidi'], low_memory=False, encoding='latin1')
    print(f"MusteriCesidi data loaded. Shape: {df_musteri_cesidi.shape}")
    if 'MusteriCesidi' not in df_musteri_cesidi.columns:
        print("Error: 'MusteriCesidi' column missing in mobil_restaurant.csv")
        raise SystemExit("Exiting due to missing MusteriCesidi column.")

    # İki DataFrame'i device_aid üzerinden birleştir
    print("Merging clustered data with MusteriCesidi data...")
    # df_clusters'daki device_aid'ler benzersiz olmalı (index_col='device_aid' ile okunmuştu device_features kaydedilirken)
    # df_musteri_cesidi'de device_aid başına birden fazla satır olabilir (her ziyaret için bir satır)
    # Bu durumda, her cihaz için MusteriCesidi'nin nasıl ele alınacağına karar vermek gerekir.
    # Eğer mobil_restaurant.csv'de device_aid başına MusteriCesidi tekilse sorun yok.
    # Eğer değilse, her device_aid için bir MusteriCesidi seçilmeli (örn: ilk görülen, en sık görülen)
    # Şimdilik, mobil_restaurant.csv'deki her device_aid için MusteriCesidi'nin tutarlı olduğunu varsayalım
    # ve birleştirmeden önce df_musteri_cesidi'yi tekilleştirelim.
    df_musteri_cesidi_unique = df_musteri_cesidi.drop_duplicates(subset=['device_aid']).copy()
    
    df_merged_analysis = pd.merge(df_clusters[['device_aid', 'cluster']], df_musteri_cesidi_unique, on='device_aid', how='left')
    print(f"Merge completed. Shape of merged data for analysis: {df_merged_analysis.shape}")

    if df_merged_analysis.empty:
        print("Merged data for analysis is empty. Cannot proceed.")
    else:
        print("\n--- Küme Bazlı Müşteri Çeşidi Dağılımı ---")
        # Her küme için MusteriCesidi sayımlarını al
        musteri_cesidi_distribution = df_merged_analysis.groupby('cluster')['MusteriCesidi'].value_counts(normalize=False).unstack(fill_value=0)
        print("\nMüşteri Çeşidi Sayıları Her Küme İçin:")
        print(musteri_cesidi_distribution.to_string())

        # Her küme için en yaygın MusteriCesidi (mode)
        print("\nHer Küme İçin En Yaygın Müşteri Çeşidi (Mod):")
        # mode() bir Series döndürür, birden fazla mod olabilir, ilkini alıyoruz.
        cluster_modes = df_merged_analysis.groupby('cluster')['MusteriCesidi'].apply(lambda x: x.mode()[0] if not x.mode().empty else 'N/A')
        print(cluster_modes.to_string())
        
        # İsteğe bağlı: Yüzdesel dağılım
        musteri_cesidi_percentage = df_merged_analysis.groupby('cluster')['MusteriCesidi'].value_counts(normalize=True).mul(100).round(2).unstack(fill_value=0)
        print("\nMüşteri Çeşidi Yüzdeleri Her Küme İçin (%):")
        print(musteri_cesidi_percentage.to_string())

except FileNotFoundError as e:
    print(f"Error: One of the required files not found. {e}")
except KeyError as e:
    print(f"Error: A required column was not found in one of the DataFrames. {e}")
except Exception as e:
    print(f"An unexpected error occurred during post-clustering analysis: {e}")

print("Post-clustering analysis for MusteriCesidi finished.")
