In [1]:
# A/B Testing
# A statistical analysis of trade data based on device type. 
# In particular, leadership wants to know if there is a statistically significant difference in mean amount of trades between iPhone¬Æ users and Android‚Ñ¢ users

In [3]:
# Import any relevant packages or libraries
import pandas as pd
from scipy import stats

In [5]:
# Load dataset into dataframe
df = pd.read_csv(r"D:\DATA\Python\User Churn\Crypto_churn_demo_dataset.csv")

In [6]:
df.head()

Unnamed: 0,user_id,label,transactions,trade_days,activity_days,days_since_onboarding,total_trade_volume_usd,avg_trade_size_usd,fees_paid_usd,session_minutes_total,sessions,asset_diversity,used_advanced_features,device
0,0,retained,3,3,8,8,169.702148,82.955593,0.296797,82.428451,16,2,0,Android
1,1,churned,2,1,5,23,971.910735,551.223154,1.948471,185.414814,14,1,0,iPhone
2,2,churned,3,2,4,44,172.476792,36.030457,0.286159,165.155415,6,4,0,Android
3,3,retained,19,9,12,222,416.302229,31.637348,0.881171,58.283429,9,7,0,Android
4,4,retained,2,2,5,215,266.558343,72.857876,0.400495,201.699895,14,2,1,iPhone


In [None]:
# Research question:
# "Do customers who open the application using an iPhone have the same number of transactions on average as customers who use Android devices?"

In [7]:
# 1. Create `map_dictionary`
map_dictionary = {'Android': 2, 'iPhone': 1}

# 2. Create new `device_type` column
df['device_type'] = df['device']

# 3. Map the new column to the dictionary
df['device_type'] = df['device_type'].map(map_dictionary)

df['device_type'].head()

0    2
1    1
2    2
3    2
4    1
Name: device_type, dtype: int64

In [8]:
df.groupby('device')['transactions'].mean()

device
Android    12.494926
iPhone     14.233098
Name: transactions, dtype: float64

In [9]:
df.groupby('device_type')['transactions'].mean()

device_type
1    14.233098
2    12.494926
Name: transactions, dtype: float64

In [None]:
#Based on the averages shown, it appears that customers who use an iPhone device to interact with the application have a higher number of transactions on average.
#However, this difference might arise from random sampling, rather than being a true difference in the number of drives.
#To assess whether the difference is statistically significant, we can conduct a hypothesis test.

In [None]:
#Hypotheses:
#ùêª0-Null: There is no difference in average number of transactions between customers who use iPhone devices and customers who use Androids.
#ùêªùê¥-Alternative: There is a difference in average number of transactions between customers who use iPhone devices and customers who use Androids.

In [12]:
#Choose significant level = 5% - standard of statistics
significant_level=0.05
significant_level

0.05

In [13]:
#Perform t-test
# 1. Isolate the `transactions` column for iPhone users.
iPhone = df[df['device_type'] == 1]['transactions']

# 2. Isolate the `transactions` column for Android users.
Android = df[df['device_type'] == 2]['transactions']

# 3. Perform the t-test
stats.ttest_ind(a=iPhone, b=Android, equal_var=False)

TtestResult(statistic=np.float64(6.04840852946447), pvalue=np.float64(1.5084126336475907e-09), df=np.float64(11470.815108766508))

In [None]:
#p-value = 1.51e-09 = 0.0000000015 < 0.05
#Since the p-value is less than the chosen significance level (5%), reject the null hypothesis.
#Conclude: there is a statistically significant difference in the average number of transactions between iPhone users and Android users.

In [None]:
#While iPhone users execute approximately 14% more transactions than Android users (14 vs 12), device type does not materially impact churn,
#suggesting that platform influences engagement intensity but not long-term retention