In [208]:
# These libraries are required for the functions below
import math
import pandas as pd

In [209]:
# Function takes two lists. The first contains unweighted values. The second contains the same values weighted.
# Returns: the weighted mean and the weighted standard deviation for Inverse Probability of Treatment Weights.
#
# Assumption: in the case where there is treatment and control data, it is assumed that data has already been filtered and
# the input is one or the other.
def weighted_mean_and_stdev (x, weighted_x):
        
    # calculate weights. Set weight to 1 if unweighted value is zero
    weights = [wx / ux if ux!=0 else 1 for wx, ux in zip(weighted_x, x)]
    
    weights_sum = sum(weights)
    
    # Calculated the weighted mean
    weighted_mean_x = sum(weighted_x) / weights_sum    
    # Calculated the weighted standard deviation  
    weighted_std_x = math.sqrt(weights_sum * (sum([w*((ux - weighted_mean_x)**2) for w, ux in zip(weights, x)])) / (
        (sum(weights)**2) - sum([i**2 for i in weights])
    ))

    return weighted_mean_x, weighted_std_x

In [210]:
# This function takes 4 parameters:
# a dataframe;
# a list of column headings for unweighted variables;
# a list of one-to-one corresponding column headings for weighted variables;
# a string, which is the name of the column containing the values indicating treatment or control. 
#
# Returns: A dataframe where the column headings are the unweighted variable names. 
# The first row contains the unweighted standardised differences and the 2nd row contains the weighted standardised differences.
def unweighted_weighted_standardised_difference (df, X, weighted_X, y):
    outdf = pd.DataFrame(index=['d', 'd_weighted'])
    for i in range(0,len(X)):
        u = X[i]
        w = weighted_X[i]
        u_control = df.loc[(df[y]==0), u]
        #print(u_control)
        u_treatment = df.loc[(df[y]==1), u]
       #print(u_treatment)
        w_control = df.loc[(df[y]==0), w]
       # print(w_control)
        w_treatment = df.loc[(df[y]==1),w]
       # print(w_treatment)
        weighted_mean_control, weighted_std_control = weighted_mean_and_stdev(u_control.tolist(), w_control.tolist())
       # print(weighted_mean_control, weighted_std_control)
        weighted_mean_treatment, weighted_std_treatment = weighted_mean_and_stdev(u_treatment.tolist(), w_treatment.tolist())      
      # print(weighted_mean_treatment, weighted_std_treatment)
        mean_treatment = u_treatment.mean()
        mean_control = u_control.mean()
        std_treatment = u_treatment.std()
        std_control = u_control.std()
        d = 100*(mean_treatment - mean_control) / math.sqrt(
            ((std_treatment**2)+(std_control**2))/2)
        dw = 100*(weighted_mean_treatment - weighted_mean_control) / math.sqrt(
            ((weighted_std_treatment**2)+(weighted_std_control**2))/2)
        outdf[u] = [d,dw]
        
    return outdf
    

In [227]:
#Example
a =[15, 12, 12, 10]
b = [9, 10, 10, 13]
c = [15, 14, 16, 14]
d = [9, 10, 9, 11]

In [228]:
df1 = pd.DataFrame()

In [229]:
df1['a']= a
df1['b']= b
df1['c']= c
df1['d']= d
df1['y']=[0,0,1,1]

In [230]:
df1

Unnamed: 0,a,b,c,d,y
0,15,9,15,9,0
1,12,10,14,10,0
2,12,10,16,9,1
3,10,13,14,11,1


In [231]:
unweighted_weighted_standardised_difference (df1, ['a', 'b'], ['c', 'd'], 'y')

Unnamed: 0,a,b
d,-138.675049,126.491106
d_weighted,-133.627589,123.565651
