# Question 2
This exercise utilizes the data set *School.sav*, which can be downloaded from this site: http://edhd.bgsu.edu/amm/datasets.html

In [151]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pylab 
from scipy import stats
import seaborn as sb
import statsmodels.api as sm
import numbers
from sklearn.preprocessing import MinMaxScaler


## Take a sample of 50

In [165]:
# Read in the data
df = pd.read_excel('school.xlsx', header=0)

In [166]:
# Sample 50
df = df.sample(n=50, random_state=123)
df.head()

Unnamed: 0,school,id,loinc93,lep93,lep94,grad93,grad94,act94,act93,pctact93,...,math93,math94me,mathch94,read93,read94me,readch94,scienc93,sci94me,scich94,medloinc
51,Schurz,52,67.2,25.7,29.3,41.5,44.1,15.1,15.2,36.2,...,155.0,,1.0,168.0,,-6.0,164.0,35.0,3.0,
31,Kelly,32,89.7,22.7,25.2,27.9,44.1,15.4,15.4,47.3,...,161.0,43.0,7.0,171.0,35.0,-10.0,170.0,43.0,10.0,1.0
63,Young Magnet,64,28.1,0.2,0.1,87.3,,22.5,22.3,91.7,...,331.0,99.0,1.0,327.0,97.0,-1.0,311.0,98.0,,
53,Simeon Vocat.,54,56.5,0.0,0.0,55.7,,15.7,15.8,54.0,...,176.0,43.0,-1.0,196.0,45.0,-14.0,180.0,41.0,-1.0,0.0
23,Harper,24,,0.2,0.1,24.1,32.8,14.1,14.5,25.6,...,128.0,12.0,1.0,127.0,19.0,-3.0,150.0,14.0,-6.0,0.0


In [167]:
df.dtypes

school       object
id            int64
loinc93     float64
lep93       float64
lep94       float64
grad93      float64
grad94      float64
act94       float64
act93       float64
pctact93    float64
pctact94    float64
math93      float64
math94me    float64
mathch94    float64
read93      float64
read94me    float64
readch94    float64
scienc93    float64
sci94me     float64
scich94     float64
medloinc     object
dtype: object

In [161]:
# TODO
#df['medloinc'==" "]=None

In [168]:
# Save sample as csv
#df.to_csv('q2_school_sample.csv')

## A. Percent Missing Data
Report the missing value information for the variables as well as the cases (observations). What % of data is missing?

In [169]:
metric_vars = df.columns.drop('school').drop('id').drop('medloinc').drop('missing')
metric_vars

ValueError: labels ['missing'] not contained in axis

In [170]:
def missing_col_summary(df):
    var_missing_summary = pd.DataFrame(columns=['variable', 'num_cases', 'mean', 'std', 'num_cases_missing', 
                                                'percent_missing'])
    missing_sum = df.isnull().sum()
    missing_perc = df.isnull().mean().round(4) * 100
    for i in range(len(df.columns)):
        if df.columns[i] == 'id':
            continue
        num_missing = missing_sum[i]
        perc_missing = missing_perc[i]
        if df.columns[i] in metric_vars:
            mean = df[df.columns[i]].mean()
            std = np.std(df[df.columns[i]])
        else:
            mean = 'NA'
            std = 'NA'
        var_missing_summary = var_missing_summary.append({'variable':df.columns[i], 'num_cases':50-num_missing, 
                                                          'mean': mean, 'std':std, 'num_cases_missing':num_missing, 
                                                          'percent_missing':perc_missing}, ignore_index=True)
    return var_missing_summary

missing_col_summary(df)

Unnamed: 0,variable,num_cases,mean,std,num_cases_missing,percent_missing
0,school,50,,,0,0.0
1,loinc93,38,57.9526,19.8399,12,24.0
2,lep93,50,6.442,9.90352,0,0.0
3,lep94,48,7.25,10.7433,2,4.0
4,grad93,41,45.6756,15.0997,9,18.0
5,grad94,40,47.235,14.9298,10,20.0
6,act94,40,15.885,1.8919,10,20.0
7,act93,38,15.8211,1.71175,12,24.0
8,pctact93,45,54.7689,18.8078,5,10.0
9,pctact94,39,48.2641,15.3308,11,22.0


In [171]:
def missing_row_summary(df):
    case_summary = pd.DataFrame(columns=['missing_data_per_case', 'num_cases', 'percent_sample'])

    for i in range(13):
        num_cases = len(df[df['missing'] == i])
        case_summary = case_summary.append({'missing_data_per_case':i, 'num_cases':num_cases,
                                            'percent_sample':num_cases/50}, ignore_index=True)
    return case_summary

df['missing'] = df.isnull().sum(axis=1)
missing = [df.index[df.isnull().sum(axis=1)]]
ml = list(missing[0])
df.loc[ml]
missing_row_summary(df)

Unnamed: 0,missing_data_per_case,num_cases,percent_sample
0,0.0,3.0,0.06
1,1.0,9.0,0.18
2,2.0,10.0,0.2
3,3.0,11.0,0.22
4,4.0,10.0,0.2
5,5.0,2.0,0.04
6,6.0,2.0,0.04
7,7.0,0.0,0.0
8,8.0,1.0,0.02
9,9.0,0.0,0.0


In [172]:
print('total cases: {}'.format(len(df[df['missing'] > -1])))

total cases: 50


## B. MAR vs CMAR
Missing at Random is diagnosed by using a t-test

CMAR is diagnosed by the Missing Value Analysis. MCAR data allows for a wider range of remedies to be applied.

Steps:
* Divide all the variables to 2 group of missing and non-missing (0 and 1). 
* Perform the t-test (all the combinations pairs) to see if the data set is nonrandom / MAR or CMAR. 
* Only show the fraction of No significant P.Values  
* How many non-significances/ total?

In [173]:
null_data = df[df.isnull().any(axis=1)]
null_data.head()

Unnamed: 0,school,id,loinc93,lep93,lep94,grad93,grad94,act94,act93,pctact93,...,math94me,mathch94,read93,read94me,readch94,scienc93,sci94me,scich94,medloinc,missing
51,Schurz,52,67.2,25.7,29.3,41.5,44.1,15.1,15.2,36.2,...,,1.0,168.0,,-6.0,164.0,35.0,3.0,,3
63,Young Magnet,64,28.1,0.2,0.1,87.3,,22.5,22.3,91.7,...,99.0,1.0,327.0,97.0,-1.0,311.0,98.0,,,4
53,Simeon Vocat.,54,56.5,0.0,0.0,55.7,,15.7,15.8,54.0,...,43.0,-1.0,196.0,45.0,-14.0,180.0,41.0,-1.0,0.0,1
23,Harper,24,,0.2,0.1,24.1,32.8,14.1,14.5,25.6,...,12.0,1.0,127.0,19.0,-3.0,150.0,14.0,-6.0,0.0,1
40,Manley,41,74.8,0.0,0.0,35.0,34.4,,14.5,24.6,...,9.0,-2.0,123.0,12.0,-10.0,,23.0,8.0,1.0,2


In [174]:
complete_data = df[~df.isnull().any(axis=1)]
complete_data.head()

Unnamed: 0,school,id,loinc93,lep93,lep94,grad93,grad94,act94,act93,pctact93,...,math94me,mathch94,read93,read94me,readch94,scienc93,sci94me,scich94,medloinc,missing
31,Kelly,32,89.7,22.7,25.2,27.9,44.1,15.4,15.4,47.3,...,43.0,7.0,171.0,35.0,-10.0,170.0,43.0,10.0,1,0
0,Amundsen,1,70.7,22.9,26.0,34.7,49.0,15.9,15.9,49.0,...,37.0,3.0,171.0,42.0,-5.0,188.0,55.0,8.0,1,0
14,Dunbar Vocat.,15,70.2,0.0,0.0,54.0,46.0,14.9,15.1,69.4,...,38.0,5.0,185.0,44.0,-11.0,170.0,37.0,-3.0,1,0


In [175]:
# id and missing should not go through analysis
# shcool and lep93 have no missing values
missing_cols = df.columns.drop('id').drop('missing').drop('school').drop('lep93').drop('medloinc')
compare_cols = df.columns.drop('id').drop('missing').drop('school').drop('medloinc')
missing_cols

Index(['loinc93', 'lep94', 'grad93', 'grad94', 'act94', 'act93', 'pctact93',
       'pctact94', 'math93', 'math94me', 'mathch94', 'read93', 'read94me',
       'readch94', 'scienc93', 'sci94me', 'scich94'],
      dtype='object')

In [176]:
compare_cols

Index(['loinc93', 'lep93', 'lep94', 'grad93', 'grad94', 'act94', 'act93',
       'pctact93', 'pctact94', 'math93', 'math94me', 'mathch94', 'read93',
       'read94me', 'readch94', 'scienc93', 'sci94me', 'scich94'],
      dtype='object')

In [194]:
alpha = 0.05

def randomness_missing(df):
    test_results = pd.DataFrame(columns=['variable1', 'variable2', 'p_value', 't_value,', 'missing_type', 
                                         'num_cases_valid','num_cases_missing', 'mean_valid_cases', 
                                         'mean_cases_missing', 'hypothesis', 'conclusion'])
    var_results = pd.DataFrame(columns=['variable', 'num_significant', 'num_nonsignificant', 'non-sig / total'])
    for var in missing_cols:
        # Step 1: Divide the data into 2 groups: missing and non-missing
        missing_data = df[df[var].isnull()]
        nonmissing_data = df[~df[var].isnull()]
        sig = 0
        nonsig = 0
        # Step 2: Perform the t-test to compare the observations of misssing and nonmissing data 
        # for var to each other var
        for compare in compare_cols:
            # If it is the same var, just track the missing data
            if compare == var:
                t_value = '.'
                p_value = '.'
                num_valid = nonmissing_data[compare].isnull().sum()
                num_missing = missing_data[compare].isnull().sum()
                mean_valid = nonmissing_data[compare].mean()
                mean_missing = '.'
                missing = 'NA'
                hypothesis = 'NA'
                conclusion = 'NA'
                
                test_results = test_results.append({'variable1':var, 'variable2':compare, 'p_value':p_value, 
                                    't_value,':t_value, 'missing_type':missing, 
                                    'num_cases_valid':50-num_missing, 'num_cases_missing':num_missing,
                                    'mean_valid_cases':mean_valid, 'mean_cases_missing':mean_missing, 
                                    'hypothesis':hypothesis, 'conclusion':conclusion}, ignore_index=True)
                continue
            # perform t-test
            t_value, p_value = stats.ttest_ind(missing_data[compare], nonmissing_data[compare])
            print("\nt-Test p-value for {0} vs. {1} = {2}".format(var, compare, p_value))
            # Evaluate the test
            missing, hypothesis, conclusion = evaluate_hypothesis(p_value, var)
            # Calcuate missing
            num_valid = nonmissing_data[compare].isnull().sum()
            num_missing = missing_data[compare].isnull().sum()
            mean_valid = nonmissing_data[compare].mean()
            mean_missing = missing_data[compare].mean()
            # Save the results    
            print("{0}. {1}.".format(hypothesis, conclusion))
            test_results = test_results.append({'variable1':var, 'variable2':compare, 'p_value':p_value, 
                                                't_value,':t_value, 'missing_type':missing, 
                                                'num_cases_valid':50-num_missing, 'num_cases_missing':num_missing,
                                                'mean_valid_cases':mean_valid, 'mean_cases_missing':mean_missing, 
                                                'hypothesis':hypothesis, 'conclusion':conclusion}, ignore_index=True)
            if missing == 'Not Significant':
                nonsig += 1
            elif missing == 'Significant':
                sig += 1
        var_results = var_results.append({'variable':var, 'num_significant':sig, 'num_nonsignificant':nonsig, 
                                          'non-sig / total':nonsig / (sig + nonsig)}, ignore_index=True)
    return test_results, var_results

def evaluate_hypothesis(p_val, i):
    if p_val > alpha:  
        missing = "Not Significant"
        hypothesis = "We fail to reject the H0 that the missing data for {} is MAR".format(i)
        conclusion = "The missing observations in {} showed no significant difference between the two groups".format(i)
    else:
        missing = "Significant"
        hypothesis = "We reject the H0 that the missing data for {} is MAR".format(i)
        conclusion = "The missing observations in {} showed a significant difference between the two groups".format(i)
    return missing, hypothesis, conclusion

test_results, var_results = randomness_missing(df)



t-Test p-value for loinc93 vs. lep93 = 0.361032349162111
We fail to reject the H0 that the missing data for loinc93 is MAR. The missing observations in loinc93 showed no significant difference between the two groups.

t-Test p-value for loinc93 vs. lep94 = nan
We reject the H0 that the missing data for loinc93 is MAR. The missing observations in loinc93 showed a significant difference between the two groups.

t-Test p-value for loinc93 vs. grad93 = nan
We reject the H0 that the missing data for loinc93 is MAR. The missing observations in loinc93 showed a significant difference between the two groups.

t-Test p-value for loinc93 vs. grad94 = nan
We reject the H0 that the missing data for loinc93 is MAR. The missing observations in loinc93 showed a significant difference between the two groups.

t-Test p-value for loinc93 vs. act94 = nan
We reject the H0 that the missing data for loinc93 is MAR. The missing observations in loinc93 showed a significant difference between the two groups.



t-Test p-value for grad94 vs. lep94 = nan
We reject the H0 that the missing data for grad94 is MAR. The missing observations in grad94 showed a significant difference between the two groups.

t-Test p-value for grad94 vs. grad93 = nan
We reject the H0 that the missing data for grad94 is MAR. The missing observations in grad94 showed a significant difference between the two groups.

t-Test p-value for grad94 vs. act94 = nan
We reject the H0 that the missing data for grad94 is MAR. The missing observations in grad94 showed a significant difference between the two groups.

t-Test p-value for grad94 vs. act93 = nan
We reject the H0 that the missing data for grad94 is MAR. The missing observations in grad94 showed a significant difference between the two groups.

t-Test p-value for grad94 vs. pctact93 = nan
We reject the H0 that the missing data for grad94 is MAR. The missing observations in grad94 showed a significant difference between the two groups.

t-Test p-value for grad94 vs. pctac

We reject the H0 that the missing data for pctact94 is MAR. The missing observations in pctact94 showed a significant difference between the two groups.

t-Test p-value for pctact94 vs. grad94 = nan
We reject the H0 that the missing data for pctact94 is MAR. The missing observations in pctact94 showed a significant difference between the two groups.

t-Test p-value for pctact94 vs. act94 = nan
We reject the H0 that the missing data for pctact94 is MAR. The missing observations in pctact94 showed a significant difference between the two groups.

t-Test p-value for pctact94 vs. act93 = nan
We reject the H0 that the missing data for pctact94 is MAR. The missing observations in pctact94 showed a significant difference between the two groups.

t-Test p-value for pctact94 vs. pctact93 = nan
We reject the H0 that the missing data for pctact94 is MAR. The missing observations in pctact94 showed a significant difference between the two groups.

t-Test p-value for pctact94 vs. math93 = nan
We re

  **kwargs)
  ret = ret.dtype.type(ret / rcount)


In [195]:
test_results

Unnamed: 0,variable1,variable2,p_value,"t_value,",missing_type,num_cases_valid,num_cases_missing,mean_valid_cases,mean_cases_missing,hypothesis,conclusion
0,loinc93,loinc93,.,.,,38,12,57.952632,.,,
1,loinc93,lep93,0.361032,-0.922208,Not Significant,50,0,7.176316,4.11667,We fail to reject the H0 that the missing data...,The missing observations in loinc93 showed no ...
2,loinc93,lep94,,,Significant,50,0,8.150000,4.55,We reject the H0 that the missing data for loi...,The missing observations in loinc93 showed a s...
3,loinc93,grad93,,,Significant,49,1,46.476667,43.4909,We reject the H0 that the missing data for loi...,The missing observations in loinc93 showed a s...
4,loinc93,grad94,,,Significant,47,3,47.496774,46.3333,We reject the H0 that the missing data for loi...,The missing observations in loinc93 showed a s...
5,loinc93,act94,,,Significant,48,2,16.133333,15.14,We reject the H0 that the missing data for loi...,The missing observations in loinc93 showed a s...
6,loinc93,act93,,,Significant,45,5,15.935484,15.3143,We reject the H0 that the missing data for loi...,The missing observations in loinc93 showed a s...
7,loinc93,pctact93,,,Significant,47,3,55.422222,52.1556,We reject the H0 that the missing data for loi...,The missing observations in loinc93 showed a s...
8,loinc93,pctact94,,,Significant,50,0,47.348148,50.325,We reject the H0 that the missing data for loi...,The missing observations in loinc93 showed a s...
9,loinc93,math93,,,Significant,47,3,173.500000,145.111,We reject the H0 that the missing data for loi...,The missing observations in loinc93 showed a s...


In [196]:
var_results

Unnamed: 0,variable,num_significant,num_nonsignificant,non-sig / total
0,loinc93,16,1,0.058824
1,lep94,16,1,0.058824
2,grad93,16,1,0.058824
3,grad94,16,1,0.058824
4,act94,16,1,0.058824
5,act93,16,1,0.058824
6,pctact93,16,1,0.058824
7,pctact94,16,1,0.058824
8,math93,16,1,0.058824
9,math94me,16,1,0.058824


## C. Delete Missing Cases
Delte cases with more than 5 missing per case

In [84]:
# number of cases with more than 5 missing
df.index[df['missing'] > 5].tolist()

[27, 13, 5, 10, 3]

In [85]:
# Drop cases with more than 5 missing data
new_df = df.drop(df.index[df['missing'] > 5].tolist())

In [86]:
len(new_df)

45

In [94]:
null_data = new_df[new_df.isnull().any(axis=1)]
null_data.head()

Unnamed: 0,school,id,loinc93,lep93,lep94,grad93,grad94,act94,act93,pctact93,...,math94me,mathch94,read93,read94me,readch94,scienc93,sci94me,scich94,medloinc,missing
51,Schurz,52,67.2,25.7,29.3,41.5,44.1,15.1,15.2,36.2,...,,1.0,168.0,,-6.0,164.0,35.0,3.0,,3
63,Young Magnet,64,28.1,0.2,0.1,87.3,,22.5,22.3,91.7,...,99.0,1.0,327.0,97.0,-1.0,311.0,98.0,,,4
53,Simeon Vocat.,54,56.5,0.0,0.0,55.7,,15.7,15.8,54.0,...,43.0,-1.0,196.0,45.0,-14.0,180.0,41.0,-1.0,0.0,1
23,Harper,24,,0.2,0.1,24.1,32.8,14.1,14.5,25.6,...,12.0,1.0,127.0,19.0,-3.0,150.0,14.0,-6.0,0.0,1
40,Manley,41,74.8,0.0,0.0,35.0,34.4,,14.5,24.6,...,9.0,-2.0,123.0,12.0,-10.0,,23.0,8.0,1.0,2


In [95]:
complete_data = new_df[~new_df.isnull().any(axis=1)]
complete_data.head()

Unnamed: 0,school,id,loinc93,lep93,lep94,grad93,grad94,act94,act93,pctact93,...,math94me,mathch94,read93,read94me,readch94,scienc93,sci94me,scich94,medloinc,missing
31,Kelly,32,89.7,22.7,25.2,27.9,44.1,15.4,15.4,47.3,...,43.0,7.0,171.0,35.0,-10.0,170.0,43.0,10.0,1,0
0,Amundsen,1,70.7,22.9,26.0,34.7,49.0,15.9,15.9,49.0,...,37.0,3.0,171.0,42.0,-5.0,188.0,55.0,8.0,1,0
14,Dunbar Vocat.,15,70.2,0.0,0.0,54.0,46.0,14.9,15.1,69.4,...,38.0,5.0,185.0,44.0,-11.0,170.0,37.0,-3.0,1,0


## D. Missing Data After Deletion
Repeat part (a) and (b) after deletion of the cases. Same as above

### Part A. Missing Data Summary

In [92]:
missing_col_summary(new_df)

Unnamed: 0,variable,num_cases,mean,std,num_cases_missing,percent_missing
0,school,50,,,0,0.0
1,loinc93,40,58.08,20.6348,10,22.22
2,lep93,50,6.69111,10.2754,0,0.0
3,lep94,48,7.58837,11.173,2,4.44
4,grad93,44,45.6872,15.3204,6,13.33
5,grad94,42,46.5108,15.0308,8,17.78
6,act94,43,15.9553,1.91541,7,15.56
7,act93,41,15.7972,1.75554,9,20.0
8,pctact93,47,55.5929,18.0564,3,6.67
9,pctact94,41,48.3861,14.1192,9,20.0


In [91]:
new_df['missing'] = new_df.isnull().sum(axis=1)
missing = [new_df.index[new_df.isnull().sum(axis=1)]]
ml = list(missing[0])
new_df.loc[ml]
missing_row_summary(new_df)

Unnamed: 0,missing_data_per_case,num_cases,percent_sample
0,0.0,3.0,0.06
1,1.0,9.0,0.18
2,2.0,11.0,0.22
3,3.0,10.0,0.2
4,4.0,10.0,0.2
5,5.0,2.0,0.04
6,6.0,0.0,0.0
7,7.0,0.0,0.0
8,8.0,0.0,0.0
9,9.0,0.0,0.0


### Part B. MAR vs CMAR

In [191]:
test_results, var_results = randomness_missing(new_df)


t-Test p-value for loinc93 vs. lep93 = 0.32239096990047134
We fail to reject the H0 that the missing data for loinc93 is MAR. The missing observations in loinc93 showed no significant difference between the two groups.

t-Test p-value for loinc93 vs. lep94 = nan
We reject the H0 that the missing data for loinc93 is MAR. The missing observations in loinc93 showed a significant difference between the two groups.

t-Test p-value for loinc93 vs. grad93 = nan
We reject the H0 that the missing data for loinc93 is MAR. The missing observations in loinc93 showed a significant difference between the two groups.

t-Test p-value for loinc93 vs. grad94 = nan
We reject the H0 that the missing data for loinc93 is MAR. The missing observations in loinc93 showed a significant difference between the two groups.

t-Test p-value for loinc93 vs. act94 = nan
We reject the H0 that the missing data for loinc93 is MAR. The missing observations in loinc93 showed a significant difference between the two groups

We reject the H0 that the missing data for grad94 is MAR. The missing observations in grad94 showed a significant difference between the two groups.

t-Test p-value for grad94 vs. act94 = nan
We reject the H0 that the missing data for grad94 is MAR. The missing observations in grad94 showed a significant difference between the two groups.

t-Test p-value for grad94 vs. act93 = nan
We reject the H0 that the missing data for grad94 is MAR. The missing observations in grad94 showed a significant difference between the two groups.

t-Test p-value for grad94 vs. pctact93 = nan
We reject the H0 that the missing data for grad94 is MAR. The missing observations in grad94 showed a significant difference between the two groups.

t-Test p-value for grad94 vs. pctact94 = nan
We reject the H0 that the missing data for grad94 is MAR. The missing observations in grad94 showed a significant difference between the two groups.

t-Test p-value for grad94 vs. math93 = nan
We reject the H0 that the missing

We reject the H0 that the missing data for math93 is MAR. The missing observations in math93 showed a significant difference between the two groups.

t-Test p-value for math93 vs. pctact94 = nan
We reject the H0 that the missing data for math93 is MAR. The missing observations in math93 showed a significant difference between the two groups.

t-Test p-value for math93 vs. math94me = nan
We reject the H0 that the missing data for math93 is MAR. The missing observations in math93 showed a significant difference between the two groups.

t-Test p-value for math93 vs. mathch94 = 0.84771520688293
We fail to reject the H0 that the missing data for math93 is MAR. The missing observations in math93 showed no significant difference between the two groups.

t-Test p-value for math93 vs. read93 = nan
We reject the H0 that the missing data for math93 is MAR. The missing observations in math93 showed a significant difference between the two groups.

t-Test p-value for math93 vs. read94me = nan
We re

In [192]:
test_results

Unnamed: 0,variable1,variable2,p_value,"t_value,",missing_type,num_cases_valid,num_cases_missing,mean_valid_cases,mean_cases_missing,hypothesis,conclusion
0,loinc93,loinc93,.,.,,40,10,58.080000,.,,
1,loinc93,lep93,0.322391,-1.00107,MAR,50,0,7.520000,3.79,We fail to reject the H0 that the missing data...,The missing observations in loinc93 showed no ...
2,loinc93,lep94,,,Not MAR,50,0,8.581818,4.31,We reject the H0 that the missing data for loi...,The missing observations in loinc93 showed a s...
3,loinc93,grad93,,,Not MAR,49,1,46.476667,43.0556,We reject the H0 that the missing data for loi...,The missing observations in loinc93 showed a s...
4,loinc93,grad94,,,Not MAR,48,2,47.082759,44.4375,We reject the H0 that the missing data for loi...,The missing observations in loinc93 showed a s...
5,loinc93,act94,,,Not MAR,49,1,16.189655,15.2,We reject the H0 that the missing data for loi...,The missing observations in loinc93 showed a s...
6,loinc93,act93,,,Not MAR,47,3,15.913793,15.3143,We reject the H0 that the missing data for loi...,The missing observations in loinc93 showed a s...
7,loinc93,pctact93,,,Not MAR,48,2,56.588235,51.3625,We reject the H0 that the missing data for loi...,The missing observations in loinc93 showed a s...
8,loinc93,pctact94,,,Not MAR,50,0,48.753846,47.43,We reject the H0 that the missing data for loi...,The missing observations in loinc93 showed a s...
9,loinc93,math93,,,Not MAR,49,1,173.500000,145.111,We reject the H0 that the missing data for loi...,The missing observations in loinc93 showed a s...


In [193]:
var_results

Unnamed: 0,variable,num_significant,num_nonsignificant,non-sig / total
0,loinc93,15,2,0.117647
1,lep94,14,3,0.176471
2,grad93,14,3,0.176471
3,grad94,14,3,0.176471
4,act94,14,3,0.176471
5,act93,14,3,0.176471
6,pctact93,15,2,0.117647
7,pctact94,14,3,0.176471
8,math93,14,3,0.176471
9,math94me,14,3,0.176471
