In [None]:
import pandas as pd
import datetime as dt

csv_file = "cleaned_online_retail_II_data.csv" #daha önce 2009-2010 ve 2010-2011 yıllarının birleştirilmiş haline (cleaned_online_retail_data.csv) de rfm analiz uygulayıp rfm_analysis_I_II_results.csv dosyasına kaydettmiştim.
df = pd.read_csv(csv_file)

In [10]:
print(df.columns)

Index(['Invoice', 'StockCode', 'Description', 'Quantity', 'InvoiceDate',
       'Price', 'Customer ID', 'Country', 'TotalPrice'],
      dtype='object')


---- RFM Segment Dağılımı Yıl Bazlı Karşılaştırma ----

In [25]:
def calculate_rfm(dataframe, reference_date):
    rfm_calc = dataframe.groupby("Customer ID").agg({
        "InvoiceDate": lambda x: (reference_date - x.max()).days,
        "Invoice": "nunique",
        "TotalPrice": "sum"
    })
    rfm_calc.columns = ["Recency", "Frequency", "Monetary"] 
    rfm_calc = rfm_calc[rfm_calc["Monetary"] > 0]
    rfm_calc["Recency"] = rfm_calc["Recency"].astype(int)

    rfm_calc["R_Score"] = pd.qcut(rfm_calc["Recency"], 5, labels=[5,4,3,2,1])
    rfm_calc["F_Score"] = pd.qcut(rfm_calc["Frequency"].rank(method="first"), 5, labels=[1,2,3,4,5])
    rfm_calc["M_Score"] = pd.qcut(rfm_calc["Monetary"], 5, labels=[1,2,3, 4,5])
    
    seg_map = {
        r'[1-2][1-2]': 'Hibernating',
        r'[1-2][3-4]': 'At Risk',
        r'[1-2]5': 'Cannot Lose Them',
        r'3[1-2]': 'About To Sleep',
        r'33': 'Need Attention',
        r'[3-4][4-5]': 'Loyal Customers',
        r'41': 'Promising',
        r'51': 'New Customers',
        r'[4-5][2-3]': 'Potential Loyalists',
        r'[4-5][4-5]': 'Champions'
    }
    rfm_calc['Segment_RF'] = rfm_calc['R_Score'].astype(str)+rfm_calc['F_Score'].astype(str)
    rfm_calc['Segment'] = rfm_calc['Segment_RF'].replace(seg_map, regex=True)
    return rfm_calc

In [26]:
# 1. 2010 Sonu itibarıyla RFM
ref_date_2010 = pd.to_datetime('2011-01-01')
df_2010_data = df[df['InvoiceDate'] < ref_date_2010].copy()
rfm_2010 = calculate_rfm(df_2010_data, ref_date_2010)
segment_counts_2010 = rfm_2010['Segment'].value_counts().reset_index()
segment_counts_2010.columns = ['Segment', 'Count_2010']

# 2. 2011 Sonu itibarıyla RFM (Tüm veri üzerinden, orijinal referans tarihi ile)
ref_date_2011 = df["InvoiceDate"].max() + pd.Timedelta(days=1) 
rfm_2011 = calculate_rfm(df, ref_date_2011) 
segment_counts_2011 = rfm_2011['Segment'].value_counts().reset_index()
segment_counts_2011.columns = ['Segment', 'Count_2011']

# 3. Karşılaştırma
segment_comparison = pd.merge(segment_counts_2010, segment_counts_2011, on='Segment', how='outer').fillna(0)
segment_comparison['Count_2010'] = segment_comparison['Count_2010'].astype(int)
segment_comparison['Count_2011'] = segment_comparison['Count_2011'].astype(int)
segment_comparison['Change'] = segment_comparison['Count_2011'] - segment_comparison['Count_2010']

In [27]:
print("\nRFM Segment Dağılımının Yıllık Değişimi:")
print(segment_comparison.sort_values(by='Segment'))


RFM Segment Dağılımının Yıllık Değişimi:
               Segment  Count_2010  Count_2011  Change
0       About To Sleep          86         351     265
1              At Risk         124         580     456
2     Cannot Lose Them          24          63      39
3            Champions         129         633     504
4          Hibernating         137        1065     928
5      Loyal Customers         150         827     677
6       Need Attention          45         186     141
7        New Customers          39          42       3
8  Potential Loyalists         124         492     368
9            Promising          27          99      72


In [28]:
segment_yoy_output_file = "segment_yoy_comparison.csv"
segment_comparison.to_csv(segment_yoy_output_file, index=False)
print(f"\nSegment yıllık karşılaştırma '{segment_yoy_output_file}' dosyasına kaydedildi.")


Segment yıllık karşılaştırma 'segment_yoy_comparison.csv' dosyasına kaydedildi.
