# Tinder experiment analysis

---

## Part 1: difference between Men and Women

* the treatment effect metric employed is the percentage decrease in conversion rate from the Princeton profile to the Rutgers profile, "the percentage decrease metric"
* bootstrap resampling employed to generate a list of the percentage decrease metrics for females matching male profiles, "df_male_full", and for males matching female profiles, "df_female_full"
* tests, null is difference is explained by chance, alternative is percentage decrease is higher for females matching males than for males matching females (females care more about academic prestige on Tinder than males)
  * one-sided t test 
  * one-sided permutation test
  * cohen's d effect size

In [1]:
!pip install --upgrade scipy

Collecting scipy
  Downloading scipy-1.7.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (38.1 MB)
[K     |████████████████████████████████| 38.1 MB 1.3 MB/s 
Installing collected packages: scipy
  Attempting uninstall: scipy
    Found existing installation: scipy 1.4.1
    Uninstalling scipy-1.4.1:
      Successfully uninstalled scipy-1.4.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
albumentations 0.1.12 requires imgaug<0.2.7,>=0.2.5, but you have imgaug 0.2.9 which is incompatible.[0m
Successfully installed scipy-1.7.3


In [2]:
import numpy as np
from random import choices, shuffle
import pandas as pd
from statistics import mean, stdev
from math import sqrt
from scipy import stats
import time


In [3]:
def bootstrap(ground_truth, predictions, metric, B, confidence_level=0.95):
    """
    helper function for providing bootstrap resampling
    
        ground_truth / predictions: ground truthed labels / model predictions
        metric: metric to generate confidence interval for
        B: number of iterations
        confidence_level: percentage confidence interval desired (default is 2 sigma)
    """
    
    # compute lower and upper significance index
    critical_value=(1-confidence_level)/2
    lower_sig=100*critical_value
    upper_sig=100*(1-critical_value)
    data=[]
    for g, p in zip(ground_truth, predictions):
        data.append([g,p])

    accuracies=[]
    # bootstrap resampling loop
    for b in range(B):
        choice=choices(data, k=len(data))
        choice=np.array(choice)
        accuracy=metric(choice[:,0], choice[:,1])
        
        accuracies.append(accuracy)
    
    #percentiles=np.percentile(accuracies, [lower_sig, 50, upper_sig])
    
    #lower=percentiles[0]
    #median=percentiles[1]
    #upper=percentiles[2]
    
    return accuracies

In [4]:
def percentage_decrease_metric(princeton, rutgers):
  # grab outcomes for each group

  printeton_outcomes = princeton
  rutgers_outcomes = rutgers

  # grab conversion rate for each group
  princeton_matches = printeton_outcomes[printeton_outcomes==1] 
  princeton_conversion_rate = len(princeton_matches)/len(printeton_outcomes)

  rutgers_matches = rutgers_outcomes[rutgers_outcomes==1] 
  rutgers_conversion_rate = len(rutgers_matches)/len(rutgers_outcomes)

  # grab percentage increase from rutgers to princeton
  percentage_decrease = ((princeton_conversion_rate - rutgers_conversion_rate)/(princeton_conversion_rate)) * 100
  return percentage_decrease

In [5]:
# read the data

df_female_full = pd.read_csv('https://raw.githubusercontent.com/daniel-furman/online-dating-field-experiment/main/data/processed_data/df_female_full.csv', index_col='Unnamed: 0')
# shuffle the data
df_female_full = df_female_full.sample(frac=1)
print(df_female_full.head())

df_male_full = pd.read_csv('https://raw.githubusercontent.com/daniel-furman/online-dating-field-experiment/main/data/processed_data/df_male_full_2.csv', index_col='Unnamed: 0')
# shuffle the data
df_male_full = df_male_full.sample(frac=1)
print('\n', df_male_full.head())


         Name  Age  School  Work  Match  Treatment
135     Tanay   23       1     0      1          1
103  Jonathan   23       1     1      1          1
43       John   22       0     1      0          0
186      Dave   22       0     0      0          1
190     Jesus   28       0     0      0          1

        Name  Age  School  Work  Match  Treatment
1       Ash   24       1     1      1          0
27     Pope   25       0     0      0          0
183  Ariana   22       0     0      0          1
38   Nguyen   24       0     1      0          0
91   Isabel   24       0     0      0          0


In [6]:
female_list = bootstrap(df_female_full[df_female_full['Treatment']==1]['Match'],
                   df_female_full[df_female_full['Treatment']==0]['Match'],
                   percentage_decrease_metric,
                   100,)
np.mean(female_list)

44.31304381474861

In [7]:
male_list = bootstrap(df_male_full[df_male_full['Treatment']==1]['Match'],
                           df_male_full[df_male_full['Treatment']==0]['Match'],
                           percentage_decrease_metric,
                           100,)
np.mean(male_list)

64.58484673287334

In [8]:
%%time
# t testing
# Null is that the mean percentage decrease from princeton to rutgers is the same between the two groups.
# Alternative is that the mean percentage decrease from princeton to rutgers is larger for females matching men.

p_val_list = []
for i in range(0,1000):
    male_bootstrap = pd.Series(male_list).sample(frac=1, replace=True).to_list()
    female_bootstrap = pd.Series(female_list).sample(frac=1, replace=True).to_list()
    p_val = stats.ttest_ind(male_bootstrap, female_bootstrap, equal_var=False, alternative='greater')[1]
    p_val_list.append(p_val)

p_val_list.sort()
lower = p_val_list[25]
median = p_val_list[500]
upper = p_val_list[975]

print(f"\nP_val for Welch's T-test: {median}, with a 95% confidence interval of [{lower},{upper}]\n")


P_val for Welch's T-test: 3.6318763884074564e-21, with a 95% confidence interval of [7.109263185259275e-32,6.218972793583267e-14]

CPU times: user 897 ms, sys: 0 ns, total: 897 ms
Wall time: 896 ms


In [9]:
# practical significance testing
def cohens_d(list1, list2): # with correction for small sample
    return (mean(list1) - mean(list2)) / (sqrt((stdev(list1) ** 2 + stdev(list2) ** 2) / 2)) * ((len(list1)- 3)/ (len(list1)- 2.25)) * sqrt(((len(list1)- 2)/len(list1)))

print("Effect size, Cohens D (number of strandard deviations between distributions): ", cohens_d(male_list, female_list))

Effect size, Cohens D (number of strandard deviations between distributions):  1.5101440084277775


In [None]:
%%time
# Non-parametric testing (permutation testing) on median
# Null is that the median percentage decrease from princeton to rutgers is explained by chance between the two groups.
# Alternative is that the median percentage decrease from princeton to rutgers is larger for females matching men.


# Testing on median
p_val_list = []
permutation_iters = 3000
ground_truth = np.median(male_list) - np.median(female_list)
# pool variables into one distribution, sample two distributions equal in size to the original 
pooled = list(male_list+female_list)
for i in range(0,1000):
    permuted_differences = []
    for i in range(0,permutation_iters):    
        shuffle(pooled)
        permuted_differences.append(np.median(pooled[0:int(len(pooled)/2)]) - np.median(pooled[int(len(pooled)/2):]))
    p_val = len(np.where(permuted_differences>=ground_truth)[0])/permutation_iters
    p_val_list.append(p_val)
p_val_list.sort()
lower = p_val_list[25]
median = p_val_list[500]
upper = p_val_list[975]
print(f'\nP_val for One-Tailed Permutation Test on Median: {median}, with a 95% confidence interval of [{lower},{upper}]\n')


P_val for One-Tailed Permutation Test on Median: 0.0, with a 95% confidence interval of [0.0,0.0]

CPU times: user 11min 22s, sys: 1.69 s, total: 11min 23s
Wall time: 11min 22s


## Part 2: ATE for control versus treatment inter-sex
---

* pools the females and the males together and calculates the ATE of having Princeton versus Rutgers overall

In [10]:
full_df = pd.concat([df_male_full, df_female_full])
full_df.sample(4)

Unnamed: 0,Name,Age,School,Work,Match,Treatment
122,Edmond,25,0,0,1,1
70,Naomi,22,0,1,0,0
110,Jay,25,1,1,1,1
74,Meylia,23,0,0,0,0


In [11]:
import statsmodels.api as sm
ytrain = full_df['Match']
Xtrain = full_df[['Age', 'School', 'Work', 'Treatment']]
log_reg = sm.Logit(ytrain, Xtrain).fit()
log_reg.summary()

  import pandas.util.testing as tm


Optimization terminated successfully.
         Current function value: 0.550270
         Iterations 6


0,1,2,3
Dep. Variable:,Match,No. Observations:,413.0
Model:,Logit,Df Residuals:,409.0
Method:,MLE,Df Model:,3.0
Date:,"Mon, 02 May 2022",Pseudo R-squ.:,0.06601
Time:,02:28:58,Log-Likelihood:,-227.26
converged:,True,LL-Null:,-243.32
Covariance Type:,nonrobust,LLR p-value:,4.923e-07

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Age,-0.0846,0.010,-8.520,0.000,-0.104,-0.065
School,0.8424,0.238,3.545,0.000,0.377,1.308
Work,0.6301,0.242,2.603,0.009,0.156,1.105
Treatment,0.7861,0.232,3.395,0.001,0.332,1.240


* ATE is 0.7861 with a standard error of 0.232 and p-value of 0.001

In [25]:
#full_df['Match'][full_df['Treatment']==1]
#full_df['Match'][full_df['Treatment']==0]


In [21]:
%%time
# t testing
# Null is that the match rate for princeton and rutgers is the same.
# Alternative is that the match rate is better for princeton then rutgers.

p_val_list = []
for i in range(0,1000):
    princeton_bootstrap = pd.Series(male_list).sample(frac=1, replace=True).to_list()
    rutgers_bootstrap = pd.Series(female_list).sample(frac=1, replace=True).to_list()
    p_val = stats.ttest_ind(princeton_bootstrap, rutgers_bootstrap, equal_var=False, alternative='greater')[1]
    p_val_list.append(p_val)

p_val_list.sort()
lower = p_val_list[25]
median = p_val_list[500]
upper = p_val_list[975]

print(f"\nP_val for Welch's T-test: {median}, with a 95% confidence interval of [{lower},{upper}]\n")


P_val for Welch's T-test: 4.318082381377927e-21, with a 95% confidence interval of [9.314061265384596e-31,1.3447208458393505e-13]

CPU times: user 870 ms, sys: 15.5 ms, total: 885 ms
Wall time: 865 ms


In [23]:
# practical significance testing
def cohens_d(list1, list2): # with correction for small sample
    return (mean(list1) - mean(list2)) / (sqrt((stdev(list1) ** 2 + stdev(list2) ** 2) / 2)) * ((len(list1)- 3)/ (len(list1)- 2.25)) * sqrt(((len(list1)- 2)/len(list1)))

print("Effect size, Cohens D (number of standard deviations between distributions): ", cohens_d(list(full_df['Match'][full_df['Treatment']==1]), list(full_df['Match'][full_df['Treatment']==0])))

Effect size, Cohens D (number of strandard deviations between distributions):  0.41177368591696484


In [24]:
%%time
# Non-parametric testing (permutation testing) on median
# Null is that the median percentage decrease from princeton to rutgers is explained by chance between the two groups.
# Alternative is that the median percentage decrease from princeton to rutgers is larger for females matching men.


# Testing on median
p_val_list = []
permutation_iters = 1000
male_list = list(full_df['Match'][full_df['Treatment']==1])
female_list = list(full_df['Match'][full_df['Treatment']==0])

ground_truth = np.median(male_list) - np.median(female_list)
# pool variables into one distribution, sample two distributions equal in size to the original 
pooled = list(male_list+female_list)
for i in range(0,1000):
    permuted_differences = []
    for i in range(0,permutation_iters):    
        shuffle(pooled)
        permuted_differences.append(np.median(pooled[0:int(len(pooled)/2)]) - np.median(pooled[int(len(pooled)/2):]))
    p_val = len(np.where(permuted_differences>=ground_truth)[0])/permutation_iters
    p_val_list.append(p_val)
p_val_list.sort()
lower = p_val_list[25]
median = p_val_list[500]
upper = p_val_list[975]
print(f'\nP_val for One-Tailed Permutation Test on Median: {median}, with a 95% confidence interval of [{lower},{upper}]\n')


P_val for One-Tailed Permutation Test on Median: 1.0, with a 95% confidence interval of [1.0,1.0]

CPU times: user 6min 17s, sys: 5.29 s, total: 6min 22s
Wall time: 6min 11s
