#  RFM Analysis on Walmart Dataset
This project uses RFM (Recency, Frequency, Monetary) analysis to segment customers based on their purchasing behavior.

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Load data
df = pd.read_csv('Walmart.csv')


### Data Cleaning

In [None]:

df['unit_price'] = df['unit_price'].replace('[\$,]', '', regex=True).astype(float)
df['date'] = pd.to_datetime(df['date'], format='%d/%m/%y')
df['TotalAmount'] = df['unit_price'] * df['quantity']


###  Snapshot Date

In [None]:

snapshot_date = df['date'].max() + pd.Timedelta(days=1)


### RFM Table

In [None]:

rfm = df.groupby('invoice_id').agg({
    'date': lambda x: (snapshot_date - x.max()).days,
    'invoice_id': 'count',
    'TotalAmount': 'sum'
})
rfm.rename(columns={
    'date': 'Recency',
    'invoice_id': 'Frequency',
    'TotalAmount': 'Monetary'
}, inplace=True)


###  RFM Scoring

In [None]:

rfm['R'] = pd.qcut(rfm['Recency'], 4, labels=[4,3,2,1])
rfm['F'] = pd.qcut(rfm['Frequency'].rank(method='first'), 4, labels=[1,2,3,4])
rfm['M'] = pd.qcut(rfm['Monetary'], 4, labels=[1,2,3,4])
rfm['RFM_Score'] = rfm[['R','F','M']].sum(axis=1)


###  Segment Customers

In [None]:

def segment(score):
    score = int(score)
    if score >= 9:
        return 'Top Customers'
    elif score >= 7:
        return 'Loyal Customers'
    elif score >= 5:
        return 'Potential'
    else:
        return 'Churned'

rfm['Segment'] = rfm['RFM_Score'].apply(segment)
rfm['Segment'].value_counts()


### Visualize

In [None]:

plt.figure(figsize=(8,5))
sns.countplot(data=rfm, x='Segment', order=rfm['Segment'].value_counts().index, palette='Set2')
plt.title("Customer Segments Based on RFM Score")
plt.xlabel("Segment")
plt.ylabel("Number of Invoices")
plt.show()


### Export

In [None]:

rfm.to_csv('rfm_walmart_output.csv')
