# Relax Inc Take-Home Challenge - Predicting User Adoption

Objective

- Identify factors that predict whether a user becomes an "adopted user", defined as logging in on 3 distinct days within any 7-day period.

In [1]:
import pandas as pd
import numpy as np
from datetime import timedelta
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

In [2]:


# --------------------------------------------------
# 1. Load Data
# --------------------------------------------------


users = pd.read_csv('takehome_users.csv', encoding='latin-1')
engagement = pd.read_csv('takehome_user_engagement.csv')


engagement['time_stamp'] = pd.to_datetime(engagement['time_stamp'])

In [5]:
# --------------------------------------------------
# 2. Define Adopted Users
# --------------------------------------------------


# An adopted user has 3 unique login days within any 7‑day window


def is_adopted(user_df):
    dates = user_df['time_stamp'].dt.date.sort_values().unique()
    for i in range(len(dates) - 2):
        if (dates[i + 2] - dates[i]).days <= 7:
            return 1
            return 0


adoption = (
engagement
.groupby('user_id')
.apply(is_adopted)
.reset_index(name='adopted')
)

  .apply(is_adopted)


In [6]:
# --------------------------------------------------
# 3. Merge User Data
# --------------------------------------------------


data = users.merge(adoption, left_on='object_id', right_on='user_id', how='left')
data['adopted'] = data['adopted'].fillna(0)


# Convert timestamps
data['creation_time'] = pd.to_datetime(data['creation_time'])
data['last_session_creation_time'] = pd.to_datetime(
data['last_session_creation_time'], unit='s'
)

In [7]:
# --------------------------------------------------
# 4. Feature Engineering
# --------------------------------------------------


# Invitation flag
data['invited'] = data['invited_by_user_id'].notnull().astype(int)


# Organization size (used instead of org_id to avoid leakage)
org_sizes = data.groupby('org_id')['object_id'].count()
data['org_size'] = data['org_id'].map(org_sizes)


# Account age (days)
data['account_age_days'] = (
data['last_session_creation_time'] - data['creation_time']
).dt.days.fillna(0)


# One‑hot encode creation source
data = pd.get_dummies(data, columns=['creation_source'], drop_first=True)

In [8]:
# --------------------------------------------------
# 5. Exploratory Analysis
# --------------------------------------------------


adoption_rate = data['adopted'].mean()
print(f"Overall adoption rate: {adoption_rate:.2%}")


adoption_by_invite = data.groupby('invited')['adopted'].mean()
print("\nAdoption rate by invitation status:")
print(adoption_by_invite)

Overall adoption rate: 13.80%

Adoption rate by invitation status:
invited
0    0.127709
1    0.146953
Name: adopted, dtype: float64


In [9]:
# --------------------------------------------------
# 6. Modeling Dataset
# --------------------------------------------------


features = [
'invited',
'org_size',
'opted_in_to_mailing_list',
'enabled_for_marketing_drip',
'account_age_days'
] + [c for c in data.columns if c.startswith('creation_source_')]


X = data[features].fillna(0)
y = data['adopted']


X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=42, stratify=y
)

In [10]:
# --------------------------------------------------
# 7. Logistic Regression Model
# --------------------------------------------------


model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


y_pred = model.predict(X_test)


print("Classification Report:\n")
print(classification_report(y_test, y_pred))


# Coefficient inspection
coef = pd.Series(model.coef_[0], index=X.columns).sort_values(ascending=False)
print("\nTop positive predictors:")
print(coef.head(5))


print("\nTop negative predictors:")
print(coef.tail(5))

Classification Report:

              precision    recall  f1-score   support

         0.0       0.98      0.99      0.98      3103
         1.0       0.91      0.86      0.88       497

    accuracy                           0.97      3600
   macro avg       0.94      0.92      0.93      3600
weighted avg       0.97      0.97      0.97      3600


Top positive predictors:
opted_in_to_mailing_list              0.342482
creation_source_SIGNUP_GOOGLE_AUTH    0.149860
account_age_days                      0.070738
org_size                             -0.005321
invited                              -0.021805
dtype: float64

Top negative predictors:
invited                             -0.021805
creation_source_PERSONAL_PROJECTS   -0.057003
creation_source_ORG_INVITE          -0.081580
creation_source_SIGNUP              -0.093401
enabled_for_marketing_drip          -0.313715
dtype: float64


**8. Key Findings**



- Invitation‑based signup is a strong predictor of adoption
- Larger organizations are associated with higher adoption
- Creation source (especially ORG_INVITE) matters
- Marketing email flags have limited predictive power

**9. Recommendations**
- Encourage invitation‑driven onboarding
- Focus on activating users early in their lifecycle
- Use org‑level context rather than org identifiers
- Collect richer behavioral data beyond login events