# Tinder experiment analysis

---

* the treatment effect metric employed is the percentage decrease in conversion rate from the Princeton profile to the Rutgers profile, "the percentage decrease metric"
* bootstrap resampling employed to generate a list of the percentage decrease metrics for females matching male profiles, "df_male_full", and for males matching female profiles, "df_female_full"
* tests, null is difference is explained by chance, alternative is percentage decrease is higher for females matching males than for males matching females (females care more about academic prestige on Tinder than males)
  * one-sided t test 
  * one-sided permutation test
  * cohen's d effect size

In [2]:
!pip install --upgrade scipy



In [3]:
import numpy as np
from random import choices, shuffle
import pandas as pd
from statistics import mean, stdev
from math import sqrt
from scipy import stats
import time


In [4]:
def bootstrap(ground_truth, predictions, metric, B, confidence_level=0.95):
    """
    helper function for providing bootstrap resampling
    
        ground_truth / predictions: ground truthed labels / model predictions
        metric: metric to generate confidence interval for
        B: number of iterations
        confidence_level: percentage confidence interval desired (default is 2 sigma)
    """
    
    # compute lower and upper significance index
    critical_value=(1-confidence_level)/2
    lower_sig=100*critical_value
    upper_sig=100*(1-critical_value)
    data=[]
    for g, p in zip(ground_truth, predictions):
        data.append([g,p])

    accuracies=[]
    # bootstrap resampling loop
    for b in range(B):
        choice=choices(data, k=len(data))
        choice=np.array(choice)
        accuracy=metric(choice[:,0], choice[:,1])
        
        accuracies.append(accuracy)
    
    #percentiles=np.percentile(accuracies, [lower_sig, 50, upper_sig])
    
    #lower=percentiles[0]
    #median=percentiles[1]
    #upper=percentiles[2]
    
    return accuracies

In [5]:
def percentage_decrease_metric(princeton, rutgers):
  # grab outcomes for each group

  printeton_outcomes = princeton
  rutgers_outcomes = rutgers

  # grab conversion rate for each group
  princeton_matches = printeton_outcomes[printeton_outcomes==1] 
  princeton_conversion_rate = len(princeton_matches)/len(printeton_outcomes)

  rutgers_matches = rutgers_outcomes[rutgers_outcomes==1] 
  rutgers_conversion_rate = len(rutgers_matches)/len(rutgers_outcomes)

  # grab percentage increase from rutgers to princeton
  percentage_decrease = ((princeton_conversion_rate - rutgers_conversion_rate)/(princeton_conversion_rate)) * 100
  return percentage_decrease

In [6]:
# read the data

df_female_full = pd.read_csv('https://raw.githubusercontent.com/daniel-furman/online-dating-field-experiment/main/data/processed_data/df_female_full.csv', index_col='Unnamed: 0')
# shuffle the data
df_female_full = df_female_full.sample(frac=1)
print(df_female_full.head())

df_male_full = pd.read_csv('https://raw.githubusercontent.com/daniel-furman/online-dating-field-experiment/main/data/processed_data/df_male_full_2.csv', index_col='Unnamed: 0')
# shuffle the data
df_male_full = df_male_full.sample(frac=1)
print('\n', df_male_full.head())


           Name  Age  School  Work  Match  Treatment
7    Squid game   30       1     1      1          0
144       Peter   26       1     1      1          1
6          Alex   28       0     0      1          0
63          Joe   29       1     0      0          0
54      Michael   23       1     0      0          0

           Name  Age  School  Work  Match  Treatment
201      Katya   24       0     0      0          1
170     Hannah   24       0     0      0          1
39     Nichole   24       0     0      0          0
122  Dominique   26       0     0      1          1
196     Ashley   21       1     0      0          1


In [7]:
female_list = bootstrap(df_female_full[df_female_full['Treatment']==1]['Match'],
                   df_female_full[df_female_full['Treatment']==0]['Match'],
                   percentage_decrease_metric,
                   100,)
np.mean(female_list)

44.10402566077028

In [8]:
male_list = bootstrap(df_male_full[df_male_full['Treatment']==1]['Match'],
                           df_male_full[df_male_full['Treatment']==0]['Match'],
                           percentage_decrease_metric,
                           100,)
np.mean(male_list)

65.05214885004938

In [9]:
%%time
# t testing
# Null is that the mean percentage decrease from princeton to rutgers is the same between the two groups.
# Alternative is that the mean percentage decrease from princeton to rutgers is larger for females matching men.

p_val_list = []
for i in range(0,1000):
    male_bootstrap = pd.Series(male_list).sample(frac=1, replace=True).to_list()
    female_bootstrap = pd.Series(female_list).sample(frac=1, replace=True).to_list()
    p_val = stats.ttest_ind(male_bootstrap, female_bootstrap, equal_var=False, alternative='greater')[1]
    p_val_list.append(p_val)

p_val_list.sort()
lower = p_val_list[25]
median = p_val_list[500]
upper = p_val_list[975]

print(f"\nP_val for Welch's T-test: {median}, with a 95% confidence interval of [{lower},{upper}]\n")


P_val for Welch's T-test: 1.1591479232274766e-21, with a 95% confidence interval of [2.450539342435309e-29,4.0696522426785225e-15]

CPU times: user 1.04 s, sys: 22.8 ms, total: 1.06 s
Wall time: 1.04 s


In [10]:
# practical significance testing
def cohens_d(list1, list2): # with correction for small sample
    return (mean(list1) - mean(list2)) / (sqrt((stdev(list1) ** 2 + stdev(list2) ** 2) / 2)) * ((len(list1)- 3)/ (len(list1)- 2.25)) * sqrt(((len(list1)- 2)/len(list1)))

print("Effect size, Cohens D (number of strandard deviations between distributions): ", cohens_d(male_list, female_list))

Effect size, Cohens D (number of strandard deviations between distributions):  1.4898754415873396


In [11]:
%%time
# Non-parametric testing (permutation testing) on median
# Null is that the median percentage decrease from princeton to rutgers is explained by chance between the two groups.
# Alternative is that the median percentage decrease from princeton to rutgers is larger for females matching men.


# Testing on median
p_val_list = []
permutation_iters = 3000
ground_truth = np.median(male_list) - np.median(female_list)
# pool variables into one distribution, sample two distributions equal in size to the original 
pooled = list(male_list+female_list)
for i in range(0,1000):
    permuted_differences = []
    for i in range(0,permutation_iters):    
        shuffle(pooled)
        permuted_differences.append(np.median(pooled[0:int(len(pooled)/2)]) - np.median(pooled[int(len(pooled)/2):]))
    p_val = len(np.where(permuted_differences>=ground_truth)[0])/permutation_iters
    p_val_list.append(p_val)
p_val_list.sort()
lower = p_val_list[25]
median = p_val_list[500]
upper = p_val_list[975]
print(f'\nP_val for One-Tailed Permutation Test of Mean: {median}, with a 95% confidence interval of [{lower},{upper}]\n')


P_val for One-Tailed Permutation Test of Mean: 0.0, with a 95% confidence interval of [0.0,0.0]

CPU times: user 11min 49s, sys: 2.42 s, total: 11min 51s
Wall time: 11min 49s
