In [1]:
import pandas as pd
import plotly.express as px
import numpy as np
import scipy

# Define Color Pallete

In [80]:
color1 = '#00704A'
color2 = '#FF9FE5'
color3 = '#45062E'
backgroundColor = '#B8A085'
ccs = ['#ACDDA9', '#00704A', '#002F20']
dcs = ['#002619','#008256', '#00de92', '#a1ffdf', '#fdfffe']

In [12]:
df = pd.read_csv('transactions.csv')

In [13]:
df.drop(['Unnamed: 0'], axis =1, inplace=True)
df.head()

Unnamed: 0,person,event,time,offer_id,reward,difficulty,duration,offer_type,web,email,mobile,social,gender,age,became_member_on,income,anonymous,income_cluster
0,78afa995795e4d85b5d9ceeca43f5fef,offer received,0,9b98b8c7a33c4b65b9aebfe6a799e6d9,5,5,7,bogo,1,1,1,0,F,75,2017-05-09,100000.0,0,1
1,a03223e636434f42ac4c3df47e8bac43,offer received,0,0b1e1539f2cc45b7b9fa7c272da2e1d7,5,20,10,discount,1,1,0,0,NS,26,2017-08-04,116000.0,1,1
2,e2127556f4f64592b11af22de27a7932,offer received,0,2906b810c7d4411798c6938adc9daaa5,2,10,7,discount,1,1,1,0,M,68,2018-04-26,70000.0,0,2
3,8ec6ce2a7e7949b1bf142def7d0e0586,offer received,0,fafdcd668e3743c1bb461111dcafc2a4,2,10,10,discount,1,1,1,1,NS,55,2017-09-25,86000.0,1,1
4,68617ca6246f4fbc85e91a2a49552598,offer received,0,4d5c57ea9a6940dd891ad53e9dbe8da0,10,10,5,bogo,1,1,1,1,NS,58,2017-10-02,91000.0,1,1


In [14]:
df.shape

(167581, 18)

# Ideas for understanding Data  
+ per person offers received vs viewed vs completed (by gender age and income, and anonymous)
+ offer difficulty vs num completed and num sent  
+ offer type vs completed and sent and viewed
+ reward vs completed sent and viewed

## Let's start with number of offers received and viewed and completed

In [61]:
g = df.groupby(['person','event']).agg(
    event_count = ('event', 'count')
)
g = g.reset_index()
g

Unnamed: 0,person,event,event_count
0,0009655768c64bdeb2e877511632db8f,offer completed,3
1,0009655768c64bdeb2e877511632db8f,offer received,5
2,0009655768c64bdeb2e877511632db8f,offer viewed,4
3,00116118485d4dfda04fdbaba9a87b5c,offer received,2
4,00116118485d4dfda04fdbaba9a87b5c,offer viewed,2
...,...,...,...
46597,fffad4f4828548d1b5583907f2e9906b,offer received,4
46598,fffad4f4828548d1b5583907f2e9906b,offer viewed,4
46599,ffff82501cea40309d5fdd7edcca4a07,offer completed,6
46600,ffff82501cea40309d5fdd7edcca4a07,offer received,6


In [79]:
gWide = g.pivot_table(index=['person'], columns = 'event', values = 'event_count')
gWide.fillna(0, inplace = True)
gWide.reset_index(inplace=True)
#gWide.index.rename('index', inplace = True)
cols = ['offer completed', 'offer received', 'offer viewed']
gWide[cols] = gWide[cols].astype(int)
gWide['percent_completed'] = (gWide['offer completed'])/ gWide['offer received']
gWide['percent_viewed'] = (gWide['offer viewed'])/ gWide['offer received']
gWide

event,person,offer completed,offer received,offer viewed,percent_completed,percent_viewed
0,0009655768c64bdeb2e877511632db8f,3,5,4,0.60,0.800000
1,00116118485d4dfda04fdbaba9a87b5c,0,2,2,0.00,1.000000
2,0011e0d4e6b944f998e987f904e8c1e5,3,5,5,0.60,1.000000
3,0020c2b971eb4e9188eac86d93036a77,3,5,3,0.60,0.600000
4,0020ccbbb6d84e358d3414a3ff76cffd,3,4,4,0.75,1.000000
...,...,...,...,...,...,...
16989,fff3ba4757bd42088c044ca26d73817a,3,6,3,0.50,0.500000
16990,fff7576017104bcc8677a8d63322b5e1,3,5,4,0.60,0.800000
16991,fff8957ea8b240a6b5e634b6ee8eafcf,0,3,2,0.00,0.666667
16992,fffad4f4828548d1b5583907f2e9906b,3,4,4,0.75,1.000000


In [82]:
fig = px.histogram(gWide, x = 'percent_completed' ,color_discrete_sequence=[color1,color2 ],nbins=40,
                  title = 'Distribution of Percentage of Offers Completed',
                   labels = {
                       'variable':''
                   }
                  )
fig.update_layout(
    xaxis_title = 'Percentage',
    yaxis_title = 'Count',
    title_x=0.5,
    plot_bgcolor = backgroundColor,
    title_font = dict(size = 25),
    bargap = 0.01,

    yaxis = dict(
        tickfont = dict(size=16),
        titlefont = dict(size = 25),
        linecolor = 'black',
    

    ),
    xaxis = dict(
        tickfont = dict(size=12),
        titlefont = dict(size =25),
        linecolor = 'black'  ,
   
    ),
    legend=dict(
        yanchor='top',
        y = .98,
        xanchor='right',
        x = 0.98
    )   
)
fig.update_traces(
    marker_line_width=1,
    marker_line_color="black"

)

In [87]:
fig = px.histogram(gWide, x = 'percent_viewed' ,color_discrete_sequence=[color1,color2 ],nbins=25,
                  title = 'Distribution of Percentage of Offers Viewed',
                   labels = {
                       'variable':''
                   }
                  )
fig.update_layout(
    xaxis_title = 'Percentage',
    yaxis_title = 'Count',
    title_x=0.5,
    plot_bgcolor = backgroundColor,
    title_font = dict(size = 25),
    bargap = 0.01,

    yaxis = dict(
        tickfont = dict(size=16),
        titlefont = dict(size = 25),
        linecolor = 'black',
    

    ),
    xaxis = dict(
        tickfont = dict(size=12),
        titlefont = dict(size =25),
        linecolor = 'black'  ,
   
    ),
    legend=dict(
        yanchor='top',
        y = .98,
        xanchor='right',
        x = 0.98
    )   
)
fig.update_traces(
    marker_line_width=1,
    marker_line_color="black"

)

### Let's combine this data with customer demographics and see if there are trends

In [92]:
h = df.groupby(['person']).agg(
    gender = ('gender', 'first'),
    age = ('age', 'first'),
    anonymous = ('anonymous', 'first'),
    income = ('income', 'first'),
    income_cluster = ('income_cluster', 'first')
)
h

Unnamed: 0_level_0,gender,age,anonymous,income,income_cluster
person,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0009655768c64bdeb2e877511632db8f,M,33,0,72000.0,0
00116118485d4dfda04fdbaba9a87b5c,NS,81,1,96000.0,1
0011e0d4e6b944f998e987f904e8c1e5,O,40,0,57000.0,0
0020c2b971eb4e9188eac86d93036a77,F,59,0,90000.0,1
0020ccbbb6d84e358d3414a3ff76cffd,F,24,0,60000.0,0
...,...,...,...,...,...
fff3ba4757bd42088c044ca26d73817a,F,69,0,83000.0,1
fff7576017104bcc8677a8d63322b5e1,M,71,0,73000.0,2
fff8957ea8b240a6b5e634b6ee8eafcf,M,71,0,56000.0,2
fffad4f4828548d1b5583907f2e9906b,M,34,0,34000.0,0


In [93]:
gWide.merge(h, how = 'left', on = 'person' )

Unnamed: 0,person,offer completed,offer received,offer viewed,percent_completed,percent_viewed,gender,age,anonymous,income,income_cluster
0,0009655768c64bdeb2e877511632db8f,3,5,4,0.60,0.800000,M,33,0,72000.0,0
1,00116118485d4dfda04fdbaba9a87b5c,0,2,2,0.00,1.000000,NS,81,1,96000.0,1
2,0011e0d4e6b944f998e987f904e8c1e5,3,5,5,0.60,1.000000,O,40,0,57000.0,0
3,0020c2b971eb4e9188eac86d93036a77,3,5,3,0.60,0.600000,F,59,0,90000.0,1
4,0020ccbbb6d84e358d3414a3ff76cffd,3,4,4,0.75,1.000000,F,24,0,60000.0,0
...,...,...,...,...,...,...,...,...,...,...,...
16989,fff3ba4757bd42088c044ca26d73817a,3,6,3,0.50,0.500000,F,69,0,83000.0,1
16990,fff7576017104bcc8677a8d63322b5e1,3,5,4,0.60,0.800000,M,71,0,73000.0,2
16991,fff8957ea8b240a6b5e634b6ee8eafcf,0,3,2,0.00,0.666667,M,71,0,56000.0,2
16992,fffad4f4828548d1b5583907f2e9906b,3,4,4,0.75,1.000000,M,34,0,34000.0,0
