# Creating Segments in Python

In [1]:
import pandas as pd
import numpy as np

## Importing the data from CV using read_csv

In [3]:
df = pd.read_csv("/home/chinmay/SuperstoreProject/Data/SuperstoreCleanData.csv", parse_dates=['orderdate'])

## Defining most recent order date

In [6]:
today = df['orderdate'].max() + pd.Timedelta(days=1)

## Creating RFM analysis by grouping customers and calculating recency, frequency and monetary matrices

In [7]:
rfm = df.groupby('customerid').agg({
    'orderdate': lambda x: (today - x.max()).days,
    'orderid': 'nunique',
    'sales': 'sum'
}).rename(columns={'orderdate': 'recency', 'orderid': 'frequency', 'sales': 'monetary'})

## Converting the raw values into standardized scores for easier customer segmentation (Scale of 1 to 5)

In [9]:
rfm['rscore'] = pd.qcut(rfm['recency'], 5, labels=range(5,0,-1))

In [10]:
rfm['fscore'] = pd.qcut(rfm['frequency'].rank(method='first'), 5, labels=range(1,6))

In [11]:
rfm['mscore'] = pd.qcut(rfm['monetary'], 5, labels=range(1,6))


## Defining function to assign customer segments based on their RFM scores

In [14]:
def segment(row):
    if row['rscore'] >= 4 and row['fscore'] >= 4 and row['mscore'] >= 4:
        return 'Champion'
    elif row['rscore'] >= 3 and row['fscore'] >= 3:
        return 'Loyal'
    elif row['rscore'] <= 2:
        return 'At Risk'
    else:
        return 'Regular'

In [15]:
rfm['segment'] = rfm.apply(segment, axis=1)

## Merging the customer segments back into the original dataframe

In [17]:
df = df.merge(rfm['segment'], left_on='customerid', right_index=True)

## Saving the enhanced dataframe (now containing customer segments) to a CSV file.

In [18]:
df.to_csv("/home/chinmay/SuperstoreProject/Data/SuperstoreCleanDataWithSegment.csv", index=False)