In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame(
    [[1001, 1, '03-01-2019', 100],
     [1002, 2, '05-01-2019', 250],
     [1003, 3, '06-01-2019', 80], 
     [1003, 4, '15-01-2019', 80], 
     [1001, 5, '15-02-2019', 30],
     [1003, 6, '15-02-2019', 30],
     [1002, 7, '15-03-2019', 30],
     [1002, 8, '15-03-2019', 30],
     [1003, 9, '02-03-2019', 80]], 
    columns=['AccountID', 'OrderID', 'OrderDate', 'Value'])
df['OrderDate'] = pd.to_datetime(df['OrderDate'], format='%d-%m-%Y')

In [3]:
df.dtypes

AccountID             int64
OrderID               int64
OrderDate    datetime64[ns]
Value                 int64
dtype: object

In [4]:
df.head(5)

Unnamed: 0,AccountID,OrderID,OrderDate,Value
0,1001,1,2019-01-03,100
1,1002,2,2019-01-05,250
2,1003,3,2019-01-06,80
3,1003,4,2019-01-15,80
4,1001,5,2019-02-15,30


In [5]:
# Total per client
df_totals = df.groupby('AccountID').agg({
        'OrderID': lambda x: x.count(),
        'Value': lambda x: x.sum()
})

df_totals.columns = ['total_transactions', 'total_spent']
df_totals

Unnamed: 0_level_0,total_transactions,total_spent
AccountID,Unnamed: 1_level_1,Unnamed: 2_level_1
1001,2,130
1002,3,310
1003,4,270


### Revenue

In [6]:
# Sales per client per month
df_sales = df.set_index('OrderDate')
df_sales = df_sales.pivot_table(
    index=['AccountID'],
    columns=pd.Grouper(freq='M'),
    values='Value', 
    aggfunc='sum',
    fill_value=0
)

# Format column headers to human readable format
df_sales.columns = [x.strftime('%b-%Y') for x in df_sales.columns]

df_sales.head()

Unnamed: 0_level_0,Jan-2019,Feb-2019,Mar-2019
AccountID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1001,100,30,0
1002,250,0,60
1003,160,30,80


In [7]:
# Cumulative per month
df_sales.cumsum(axis=1)

Unnamed: 0_level_0,Jan-2019,Feb-2019,Mar-2019
AccountID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1001,100,130,130
1002,250,250,310
1003,160,190,270


### Frequency

In [8]:
# Transactions per client per month
df_frequency = df.set_index('OrderDate')
df_frequency = df_frequency.pivot_table(
    index=['AccountID'],
    columns=pd.Grouper(freq='M'),
    values= 'OrderID', 
    aggfunc='count',
    fill_value=0
)

# Format column headers to human readable format
df_frequency.columns = [x.strftime('%b-%Y') for x in df_frequency.columns]

df_frequency.head()

Unnamed: 0_level_0,Jan-2019,Feb-2019,Mar-2019
AccountID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1001,1,1,0
1002,1,0,2
1003,2,1,1


In [9]:
repeat_rate = df_frequency[df_frequency > 1].count() / len(df_frequency)
repeat_rate.to_frame(name='repeat_rate')

Unnamed: 0,repeat_rate
Jan-2019,0.333333
Feb-2019,0.0
Mar-2019,0.333333


In [None]:
#