**Problem:** Defining an "adopted user" as a user who has logged into the product on three separate days in at least one seven­day period, identify which factors predict future user adoption.

## Imports


In [33]:
import pandas as pd
import numpy as np
import datetime as dt

In [34]:
# Load the datasets
df1 = pd.read_csv('takehome_user_engagement.csv', encoding='latin-1')
df2 = pd.read_csv('takehome_users.csv', encoding='latin-1')

In [35]:
print('shape:', df1.shape)
df1.head()

shape: (207917, 3)


Unnamed: 0,time_stamp,user_id,visited
0,2014-04-22 03:53:30,1,1
1,2013-11-15 03:45:04,2,1
2,2013-11-29 03:45:04,2,1
3,2013-12-09 03:45:04,2,1
4,2013-12-25 03:45:04,2,1


In [36]:
df1.dtypes

time_stamp    object
user_id        int64
visited        int64
dtype: object

In [37]:
# Change the type of time stamp column
df1['time_stamp'] = pd.to_datetime(df1['time_stamp'])

In [39]:
df1['time_diff'] = df1['time_stamp'].groupby(df1['user_id']).diff().dt.days
df1.head()

Unnamed: 0,time_stamp,user_id,visited,time_diff
0,2014-04-22 03:53:30,1,1,
1,2013-11-15 03:45:04,2,1,
2,2013-11-29 03:45:04,2,1,14.0
3,2013-12-09 03:45:04,2,1,10.0
4,2013-12-25 03:45:04,2,1,16.0


In [40]:
df1['time_diff'].replace(np.nan, 0, inplace=True)

In [41]:
df1.head(40)

Unnamed: 0,time_stamp,user_id,visited,time_diff
0,2014-04-22 03:53:30,1,1,0.0
1,2013-11-15 03:45:04,2,1,0.0
2,2013-11-29 03:45:04,2,1,14.0
3,2013-12-09 03:45:04,2,1,10.0
4,2013-12-25 03:45:04,2,1,16.0
5,2013-12-31 03:45:04,2,1,6.0
6,2014-01-08 03:45:04,2,1,8.0
7,2014-02-03 03:45:04,2,1,26.0
8,2014-02-08 03:45:04,2,1,5.0
9,2014-02-09 03:45:04,2,1,1.0


In [25]:
for user in df1.user_id.unique():
    if df1.user_id == user:
        

array([    1,     2,     3, ..., 11998, 11999, 12000])

In [22]:
# find the # of loggins in a week 
df1.groupby(['user_id', pd.Grouper(key='time_stamp', freq='7D')]).sum().reset_index().head(30)

Unnamed: 0,user_id,time_stamp,visited
0,1,2014-04-17,1
1,2,2013-11-14,1
2,2,2013-11-28,1
3,2,2013-12-05,1
4,2,2013-12-19,1
5,2,2013-12-26,1
6,2,2014-01-02,1
7,2,2014-01-30,1
8,2,2014-02-06,2
9,2,2014-02-13,2


In [61]:
# Status of being adopted
df1['adop_user_weekly'] = np.where(df1['visited']>=3, 1, 0)

In [65]:
df1.head(30)

Unnamed: 0,user_id,time_stamp,visited,adop_user_weekly
0,1,2014-04-17,1,0
1,2,2013-11-14,1,0
2,2,2013-11-28,1,0
3,2,2013-12-05,1,0
4,2,2013-12-19,1,0
5,2,2013-12-26,1,0
6,2,2014-01-02,1,0
7,2,2014-01-30,1,0
8,2,2014-02-06,2,0
9,2,2014-02-13,2,0


In [64]:
df1.groupby(by='user_id').agg({'visited':'sum', 'adop_user_weekly':'sum'})

Unnamed: 0_level_0,visited,adop_user_weekly
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1,0
2,14,0
3,1,0
4,1,0
5,1,0
...,...,...
11996,1,0
11997,1,0
11998,1,0
11999,1,0


In [20]:
print('shape:', df2.shape)
df2.head()

shape: (12000, 10)


Unnamed: 0,object_id,creation_time,name,email,creation_source,last_session_creation_time,opted_in_to_mailing_list,enabled_for_marketing_drip,org_id,invited_by_user_id
0,1,2014-04-22 03:53:30,Clausen August,AugustCClausen@yahoo.com,GUEST_INVITE,1398139000.0,1,0,11,10803.0
1,2,2013-11-15 03:45:04,Poole Matthew,MatthewPoole@gustr.com,ORG_INVITE,1396238000.0,0,0,1,316.0
2,3,2013-03-19 23:14:52,Bottrill Mitchell,MitchellBottrill@gustr.com,ORG_INVITE,1363735000.0,0,0,94,1525.0
3,4,2013-05-21 08:09:28,Clausen Nicklas,NicklasSClausen@yahoo.com,GUEST_INVITE,1369210000.0,0,0,1,5151.0
4,5,2013-01-17 10:14:20,Raw Grace,GraceRaw@yahoo.com,GUEST_INVITE,1358850000.0,0,0,193,5240.0


In [44]:
df2[df2['last_session_creation_time'].isnull()==False]

Unnamed: 0,object_id,creation_time,name,email,creation_source,last_session_creation_time,opted_in_to_mailing_list,enabled_for_marketing_drip,org_id,invited_by_user_id
0,1,2014-04-22 03:53:30,Clausen August,AugustCClausen@yahoo.com,GUEST_INVITE,1.398139e+09,1,0,11,10803.0
1,2,2013-11-15 03:45:04,Poole Matthew,MatthewPoole@gustr.com,ORG_INVITE,1.396238e+09,0,0,1,316.0
2,3,2013-03-19 23:14:52,Bottrill Mitchell,MitchellBottrill@gustr.com,ORG_INVITE,1.363735e+09,0,0,94,1525.0
3,4,2013-05-21 08:09:28,Clausen Nicklas,NicklasSClausen@yahoo.com,GUEST_INVITE,1.369210e+09,0,0,1,5151.0
4,5,2013-01-17 10:14:20,Raw Grace,GraceRaw@yahoo.com,GUEST_INVITE,1.358850e+09,0,0,193,5240.0
...,...,...,...,...,...,...,...,...,...,...
11995,11996,2013-09-06 06:14:15,Meier Sophia,SophiaMeier@gustr.com,ORG_INVITE,1.378448e+09,0,0,89,8263.0
11996,11997,2013-01-10 18:28:37,Fisher Amelie,AmelieFisher@gmail.com,SIGNUP_GOOGLE_AUTH,1.358275e+09,0,0,200,
11997,11998,2014-04-27 12:45:16,Haynes Jake,JakeHaynes@cuvox.de,GUEST_INVITE,1.398603e+09,1,1,83,8074.0
11998,11999,2012-05-31 11:55:59,Faber Annett,mhaerzxp@iuxiw.com,PERSONAL_PROJECTS,1.338638e+09,0,0,6,


In [49]:
df2[df2['last_session_creation_time'].isnull()==True]

Unnamed: 0,object_id,creation_time,name,email,creation_source,last_session_creation_time,opted_in_to_mailing_list,enabled_for_marketing_drip,org_id,invited_by_user_id
7,8,2013-07-31 05:34:02,Hamilton Danielle,DanielleHamilton@yahoo.com,PERSONAL_PROJECTS,,1,1,74,
8,9,2013-11-05 04:04:24,Amsel Paul,PaulAmsel@hotmail.com,PERSONAL_PROJECTS,,0,0,302,
11,12,2014-04-17 23:48:38,Mathiesen Lærke,LaerkeLMathiesen@cuvox.de,ORG_INVITE,,0,0,130,9270.0
14,15,2013-07-16 21:33:54,Theiss Ralf,RalfTheiss@hotmail.com,PERSONAL_PROJECTS,,0,0,175,
15,16,2013-02-11 10:09:50,Engel René,ReneEngel@hotmail.com,PERSONAL_PROJECTS,,0,0,211,
...,...,...,...,...,...,...,...,...,...,...
11975,11976,2013-12-25 22:01:41,Kohl Leah,LeahKohl@hotmail.com,PERSONAL_PROJECTS,,0,0,248,
11977,11978,2014-04-23 16:28:06,Castro Pedro,PedroCunhaCastro@gustr.com,PERSONAL_PROJECTS,,1,0,29,
11984,11985,2013-07-08 17:23:26,Jespersen Marcus,MarcusTJespersen@cuvox.de,PERSONAL_PROJECTS,,0,0,74,
11992,11993,2013-03-28 23:24:21,Townsend Isabel,IsabelTownsend@cuvox.de,PERSONAL_PROJECTS,,0,0,281,


In [38]:
df2['last_session_creation_time']

0        1.398139e+09
1        1.396238e+09
2        1.363735e+09
3        1.369210e+09
4        1.358850e+09
             ...     
11995    1.378448e+09
11996    1.358275e+09
11997    1.398603e+09
11998    1.338638e+09
11999    1.390727e+09
Name: last_session_creation_time, Length: 12000, dtype: float64

In [8]:
# Change the name of object_id column. user_id is more obvious
df2.rename(columns = {'object_id': 'user_id'}, inplace=True)

In [11]:
df2[df2['user_id']==15]

Unnamed: 0,user_id,creation_time,name,email,creation_source,last_session_creation_time,opted_in_to_mailing_list,enabled_for_marketing_drip,org_id,invited_by_user_id
14,15,2013-07-16 21:33:54,Theiss Ralf,RalfTheiss@hotmail.com,PERSONAL_PROJECTS,,0,0,175,
