In [1]:
import pandas as pd
pd.set_option('display.max_columns', None, 'display.max_rows', None)

In [2]:
# This was run to convert the excel file to csv

# xl_fname ='EAMMi2-Data1.2.xlsx'
# df_raw = pd.read_excel(xl_fname)
# df_raw.columns = map(str.lower, df_raw.columns)
# df_raw.to_csv('EAMMi2-Data1.2.csv', header=True, index=None)

In [3]:
fname = '../data/EAMMi2-Data1.2.csv'

In [4]:
df_raw = pd.read_csv(fname)

In [63]:
# This will be used when grabbing verbose descriptions of columns
# In other words, the text of the questions
label_desc = pd.read_csv('../data/EAMMi2-Labels.csv')
label_desc.tail()

Unnamed: 0,Variable Name,Question text,Survey Question ID
323,Q81_Click Count,Timing - Click Count,"{""ImportId"":""QID81_CLICK_COUNT""}"
324,comments,"Do you have any questions, comments, or concer...","{""ImportId"":""QID84_TEXT""}"
325,affiliation,affiliation,"{""ImportId"":""affiliation""}"
326,response_bias_SUM,Sum of all Bias Dummy Variables,ADDED
327,school_coded,Cleaned Names of Schools Attended,ADDED


In [65]:
def get_question_text(colname):
    '''Get column description'''
    
    cond = label_desc['Variable Name'] == colname
    idx = label_desc.index[cond].tolist()[0]
    text = label_desc.iloc[idx]['Question text']
    
    return text

In [66]:
# Example of grabbing question text
get_question_text('president')

'In the coming election, which candidate do you most support to be the next president of the United States?'

In [5]:
def filter_attention(df):
    '''These columns were used to ensure people were paying attention
    Filter on these conditions, then drop the columns'''
    
    cond1 = df['usdream_3'] == 1
    cond2 = df['attenion2'] == 7
    df = df[cond1 & cond2]
    df.drop(['usdream_3', 'attenion2'], axis=1, inplace=True)
    
    return df.reset_index(drop=True)

In [6]:
def timing(df):
    '''Calculate the duration (in min) for each section,
    then drop the original columns'''
    
    first_click = [c for c in df.columns if 'first click' in c]
    last_click = [c for c in df.columns if 'last click' in c]
    del_cols = [c for c in df.columns if 'click' in c or 'submit' in c] + ['duration (in seconds)']
    
    # Calc total duration in minutes
    df.insert(0, 'duration_min', df['duration (in seconds)'] / 60)
    
    # Calc minutes for each section
    for first, last in zip(first_click, last_click):
        idx = df.columns.get_loc(last)
        new_title = first[:4] + 'duration'
        minutes = (df[last]-df[first]) / 60
        
        df.insert(idx, new_title, minutes)
        df[new_title] = df[new_title].round(2)
    
    # Drop cols/ nan rows
    df = df[pd.notnull(df['q65_last click'])]
    df = df[pd.notnull(df['q81_last click'])]
    df.drop(del_cols, axis=1, inplace=True)
    
    return df.reset_index(drop=True)

In [7]:
def drop(df):
    '''Drop these columns for various reasons'''
    
    # npi questions are not exclusive, but instructions call for binary response 
    # Otherwise lots of encoding if keeping them in
    npis = [c for c in df.columns if 'npi' in c]
    
    del_cols = ['startdate','enddate','status', 'progress','recordeddate',
                'responseid', 'recipientlastname','recipientfirstname','recipientemail',
                'externalreference','distributionchannel','informedconsent','president',
                'transgres','relation','relation_10_text','fault','comments','q14_6',
                'q14_6_text','school','q81','affiliation','school_coded','race_6_text',
                'q78_duration','marriage3']
    total_cols = npis + del_cols
    
    return df.drop(total_cols, axis=1) 

In [8]:
def rename(df):
    
    # Rename various cols for readability
    rename_dict = {
        'adult_q': 'moa_adult',
        'q65_duration': 'moa_duration',
        'q66_duration': 'idea_duration',
        'q74_duration': 'politic_duration',
        'q67_duration': 'swb_duration',
        'q68_duration': 'mindful_duration',
        'belnow': 'belong_now',
        'q72_duration': 'belong_duration',
        'q77_duration': 'efficacy_duration',
        'q96_duration': 'support_duration',
        'q80_duration': 'socmedia_duration',
        'q73_duration': 'usdream_duration',
        'q78_duration': 'transgres_text_duration',
        'q79_duration': 'transgres_duration',
        'q76_duration': 'exploit_duration',
        'q71_duration': 'disability_duration',
        'q70_duration': 'phys_duration',
        'q69_duration': 'stress_duration',
        'q75_duration': 'marriage_duration',
        'q81_duration': 'demo_duration',
        'freq': 'transgres_freq',
        'common': 'transgres_common',
        'age': 'demo_age',
        'q82': 'demo_mil',
        'q83': 'demo_mil_years',
        'place2': 'demo_us',
        'q80': 'demo_us_years',
        'phys_sx_biaschec': 'physsx_biascheck',
        'phys_sym_bias_dummy,': 'physsx_bias_dummy'
    }
    
    # Rename disability cols
    dis = [c for c in df.columns if 'q11' in c or 'q14' in c or 'q10' in c]
    new_names = ['disability' + x[1:] for x in dis]
    dis_dict = {old:new for old,new in zip(dis, new_names)}
    
    total = {**rename_dict, **dis_dict}

    return df.rename(columns=total)

In [9]:
def drop_enc(df):
    '''Until these columns can be encoded, they will be dropped'''
    
    enc_cols = ['politics','party','feel','marriage5','sex','edu','sibling','race','income','place']
    
    # Include corresponding coumns
    plus = ['politic_duration']
    total = enc_cols+plus
    
    return df.drop(total, axis=1)

In [10]:
def remap(df):
    '''These numbers need to be remapped to make ordinal sense'''
    df = df.copy()
    
    disability = [c for c in df.columns if 'disability' in c]   
    df[disability].replace({2:0}, inplace=True)
    df['moa_adult'].replace({3: 1, 1: 3}, inplace=True)
    df['demo_us'].replace({2:0}, inplace=True)
    df['demo_mil'].replace({3: 0}, inplace=True)
    
    return df

In [11]:
def fill_nas(df):
    df = df.copy()
    
    disability = [c for c in df.columns if 'disability' in c]   
    df[disability] = df[disability].fillna(0)
    df['demo_age'] = df['demo_age'].fillna(df['demo_age'].median())
    df['demo_mil'] = df['demo_mil'].fillna(0)
    df['demo_us'] = df['demo_us'].fillna(0)
    df['demo_mil_years'] = df['demo_mil_years'].fillna(0)
    df['demo_us_years'] = df['demo_us_years'].fillna(0)
    df = df.fillna(df.median())
    
    return df

In [12]:
def total_process(df):
    df = filter_attention(df)
    df = timing(df)
    df = drop(df)
    df = rename(df)
    df = drop_enc(df)
    df = remap(df)
    df = fill_nas(df)

    return df

In [13]:
df = total_process(df_raw.copy());

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  method=method,


In [17]:
print(f'Total number of missing values: {df.isna().sum().sum()}')
df.head()

Total number of missing values: 0


Unnamed: 0,duration_min,finished,moa1#1_1,moa1#1_2,moa1#1_3,moa1#1_4,moa1#1_5,moa1#1_6,moa1#1_7,moa1#1_8,moa1#1_9,moa1#1_10,moa1#2_1,moa1#2_2,moa1#2_3,moa1#2_4,moa1#2_5,moa1#2_6,moa1#2_7,moa1#2_8,moa1#2_9,moa1#2_10,moa2#1_1,moa2#1_2,moa2#1_3,moa2#1_4,moa2#1_5,moa2#1_6,moa2#1_7,moa2#1_8,moa2#1_9,moa2#1_10,moa2#2_1,moa2#2_2,moa2#2_3,moa2#2_4,moa2#2_5,moa2#2_6,moa2#2_7,moa2#2_8,moa2#2_9,moa2#2_10,moa_adult,moa_imp_biascheck,moa_ach_biascheck,moa_imp_dummy,moa-ach_dummy,moa_duration,idea_1,idea_2,idea_3,idea_4,idea_5,idea_6,idea_7,idea_8,idea-biascheck,idea-bias-dummy,idea_duration,swb_1,swb_2,swb_3,swb_4,swb_5,swb_6,swb_duration,mindful_1,mindful_2,mindful_3,mindful_4,mindful_5,mindful_6,mindful_7,mindful_8,mindful_9,mindful_10,mindful_11,mindful_12,mindful_13,mindful_14,mindful_15,mindful_biascheck,mindful_bias_dummy,mindful_duration,belong_1,belong_2,belong_3,belong_4,belong_5,belong_6,belong_7,belong_8,belong_9,belong_10,belong_now,belong_biascheck,belong_bias_dummy,belong_duration,efficacy_1,efficacy_2,efficacy_3,efficacy_4,efficacy_5,efficacy_6,efficacy_7,efficacy_8,efficacy_9,efficacy_10,efficacy_biascheck,efficacy_bias_dummy,efficacy_duration,support_1,support_2,support_3,support_4,support_5,support_6,support_7,support_8,support_9,support_10,support_11,support_12,support_biascheck,support_bias_dummy,support_duration,socmedia_1,socmedia_2,socmedia_3,socmedia_4,socmedia_5,socmedia_6,socmedia_7,socmedia_8,socmedia_9,socmedia_10,socmedia_11,socmedia_biascheck,socmedia_bias_dummy,socmedia_duration,usdream_1,usdream_2,usdream_duration,transgres_freq,transgres_common,transgres_1,transgres_2,transgres_3,transgres_4,transgres_duration,exploit_1,exploit_2,exploit_3,exploit_duration,disability11,disability14_1,disability14_2,disability14_3,disability14_4,disability14_5,disability10_1,disability10_2,disability10_3,disability10_4,disability10_5,disability10_6,disability10_7,disability10_8,disability10_9,disability10_10,disability10_11,disability10_12,disability10_13,disability10_14,disability10_15,disability_duration,physsx_1,physsx_2,physsx_3,physsx_4,physsx_5,physsx_6,physsx_7,physsx_8,physsx_9,physsx_10,physsx_11,physsx_12,physsx_13,physsx_biascheck,physsx_bias_dummy,phys_duration,stress_1,stress_2,stress_3,stress_4,stress_5,stress_6,stress_7,stress_8,stress_9,stress_10,stress_biascheck,stress_bias_dummy,stress_duration,marriage1_1,marriage1_2,marriage1_3,marriage1_4,marriage2,marriage4,marriage_duration,demo_age,demo_mil,demo_mil_years,demo_us,demo_us_years,demo_duration,response_bias_sum
0,30.65,1,4.0,4.0,3.0,2.0,2.0,3.0,2.0,1.0,4.0,3.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,3.0,3.0,2.0,4.0,4.0,4.0,4.0,3.0,4.0,4.0,4.0,3.0,2.0,2.0,1.0,1.0,3.0,2.0,3.0,3.0,2.0,2.0,1.0,3.0,64,38,0,0,4.51,3.0,4.0,4.0,3.0,4.0,4.0,4.0,4.0,30,0,0.7,4.0,6.0,5.0,5.0,3.0,3.0,0.51,4.0,2.0,2.0,2.0,4.0,1.0,2.0,2.0,2.0,2.0,2.0,4.0,1.0,2.0,4.0,36,0,2.02,4.0,2.0,4.0,4.0,4.0,2.0,5.0,2.0,4.0,3.0,4.0,38,0,1.2,4.0,3.0,4.0,3.0,3.0,4.0,3.0,3.0,4.0,3.0,34,0,1.48,7.0,4.0,6.0,5.0,6.0,6.0,7.0,7.0,7.0,4.0,6.0,7.0,72,0,1.32,4.0,2.0,5.0,3.0,5.0,5.0,5.0,4.0,5.0,5.0,4.0,47,0,0.7,4.0,4.0,0.17,3.0,3.0,3.0,1.0,1.0,1.0,0.18,2.0,2.0,2.0,1.94,2.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.38,3.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,3.0,2.0,24,0,0.68,2.0,4.0,5.0,3.0,3.0,3.0,4.0,3.0,3.0,3.0,33,0,1.5,10.0,25.0,30.0,35.0,2.0,1.0,2.41,20.0,0.0,0.0,0.0,0.0,0.0,0
1,24.45,1,4.0,4.0,4.0,2.0,3.0,3.0,4.0,3.0,3.0,3.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,3.0,3.0,4.0,2.0,4.0,4.0,3.0,2.0,4.0,2.0,1.0,3.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,3.0,62,33,0,0,3.61,4.0,4.0,4.0,4.0,3.0,4.0,4.0,4.0,31,0,0.75,3.0,4.0,5.0,5.0,4.0,4.0,0.34,2.0,2.0,2.0,1.0,3.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,1.0,1.0,5.0,27,0,1.06,2.0,3.0,1.0,5.0,4.0,4.0,2.0,4.0,5.0,4.0,4.0,38,0,0.99,3.0,3.0,3.0,4.0,4.0,4.0,3.0,3.0,3.0,4.0,34,0,1.05,7.0,7.0,7.0,6.0,7.0,6.0,6.0,7.0,7.0,7.0,7.0,7.0,81,0,0.45,3.0,2.0,4.0,2.0,1.0,1.0,1.0,1.0,2.0,4.0,2.0,23,0,0.58,4.0,4.0,0.37,4.0,4.0,4.0,3.0,2.0,1.0,0.19,4.0,4.0,3.0,1.56,2.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.28,2.0,2.0,1.0,1.0,3.0,1.0,1.0,3.0,2.0,1.0,1.0,3.0,3.0,24,0,0.38,4.0,5.0,5.0,4.0,3.0,3.0,2.0,2.0,4.0,4.0,36,0,0.56,10.0,25.0,35.0,30.0,3.0,1.0,0.82,23.0,0.0,0.0,0.0,0.0,0.0,0
2,36.416667,0,4.0,4.0,4.0,1.0,1.0,4.0,2.0,3.0,4.0,3.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,3.0,3.0,4.0,2.0,2.0,4.0,3.0,3.0,4.0,4.0,3.0,2.0,2.0,1.0,1.0,2.0,1.0,2.0,3.0,1.0,1.0,1.0,3.0,61,33,0,0,2.11,4.0,4.0,4.0,4.0,4.0,4.0,3.0,3.0,30,0,0.47,1.0,2.0,2.0,2.0,2.0,2.0,0.26,2.0,3.0,1.0,2.0,3.0,1.0,2.0,2.0,5.0,3.0,2.0,1.0,1.0,1.0,4.0,33,0,1.58,4.0,4.0,2.0,5.0,4.0,4.0,2.0,3.0,4.0,4.0,2.0,38,0,1.09,3.0,3.0,1.0,2.0,2.0,3.0,1.0,3.0,2.0,2.0,22,0,2.35,6.0,6.0,5.0,2.0,7.0,5.0,5.0,3.0,6.0,6.0,5.0,6.0,62,0,0.93,3.0,3.0,4.0,2.0,3.0,4.0,4.0,2.0,3.0,3.0,3.0,34,0,0.61,2.0,2.0,0.18,6.0,5.0,3.0,1.0,4.0,1.0,0.34,5.0,5.0,3.0,1.66,2.0,2.0,2.0,2.0,1.0,2.0,4.0,3.0,4.0,2.0,4.0,5.0,3.0,1.0,4.0,1.0,3.0,1.0,4.0,2.0,2.0,1.85,3.0,1.0,1.0,3.0,2.0,2.0,1.0,3.0,2.0,1.0,2.0,3.0,3.0,27,0,0.71,4.0,4.0,5.0,2.0,1.0,5.0,2.0,2.0,4.0,4.0,33,0,0.79,1.0,1.0,59.0,39.0,2.0,1.0,1.99,23.0,0.0,0.0,0.0,0.0,0.0,0
3,48.4,1,4.0,3.0,3.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,2.0,3.0,4.0,2.0,2.0,4.0,3.0,2.0,4.0,2.0,3.0,2.0,2.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,3.0,46,32,0,0,4.63,4.0,4.0,3.0,3.0,4.0,4.0,4.0,4.0,30,0,2.42,5.0,6.0,6.0,5.0,6.0,3.0,0.69,2.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,3.0,2.0,2.0,3.0,3.0,2.0,4.0,33,0,2.42,3.0,4.0,1.0,5.0,4.0,5.0,2.0,4.0,4.0,4.0,4.0,40,0,1.34,4.0,1.0,2.0,3.0,2.0,4.0,2.0,3.0,4.0,3.0,28,0,2.55,6.0,6.0,7.0,3.0,7.0,6.0,5.0,4.0,6.0,6.0,6.0,5.0,67,0,2.0,4.0,2.0,5.0,2.0,2.0,4.0,4.0,1.0,3.0,4.0,4.0,35,0,0.82,1.0,3.0,0.2,3.0,2.0,4.0,2.0,1.0,1.0,0.44,2.0,1.0,2.0,2.88,2.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.93,2.0,3.0,2.0,3.0,1.0,2.0,1.0,2.0,1.0,1.0,2.0,3.0,2.0,25,0,0.49,2.0,4.0,4.0,3.0,4.0,5.0,4.0,2.0,2.0,2.0,32,0,2.29,0.0,0.0,60.0,40.0,1.0,1.0,1.35,22.0,0.0,0.0,0.0,0.0,0.0,0
4,19.333333,1,4.0,4.0,4.0,4.0,4.0,4.0,1.0,2.0,4.0,4.0,2.0,2.0,2.0,1.0,1.0,2.0,1.0,3.0,3.0,1.0,4.0,4.0,4.0,4.0,4.0,4.0,2.0,4.0,4.0,2.0,3.0,1.0,3.0,3.0,2.0,3.0,1.0,2.0,3.0,1.0,3.0,71,40,0,0,2.69,4.0,4.0,3.0,2.0,4.0,4.0,3.0,3.0,27,0,0.37,5.0,5.0,5.0,5.0,5.0,5.0,0.15,3.0,3.0,4.0,3.0,3.0,3.0,4.0,4.0,4.0,4.0,4.0,3.0,3.0,3.0,3.0,51,0,0.96,4.0,3.0,3.0,3.0,4.0,2.0,5.0,3.0,4.0,4.0,4.0,39,0,0.84,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,30,1,0.38,7.0,5.0,6.0,6.0,5.0,6.0,7.0,6.0,7.0,6.0,5.0,6.0,72,0,0.79,3.0,2.0,4.0,5.0,4.0,4.0,2.0,3.0,4.0,3.0,3.0,37,0,0.47,5.0,4.0,0.11,2.0,2.0,1.0,1.0,1.0,1.0,0.13,6.0,2.0,2.0,1.38,2.0,2.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,0.39,1.0,3.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,3.0,3.0,23,0,0.6,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,2.0,29,0,0.3,30.0,40.0,10.0,20.0,4.0,2.0,1.58,21.0,0.0,0.0,1.0,21.0,0.11,1


In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2826 entries, 0 to 2825
Columns: 219 entries, duration_min to response_bias_sum
dtypes: float64(197), int64(22)
memory usage: 4.7 MB


In [32]:
# Lists of individual sections

moa = [c for c in df.columns if 'moa' in c]
idea = [c for c in df.columns if 'idea' in c]
swb = [c for c in df.columns if 'swb' in c]
mindful = [c for c in df.columns if 'mindful' in c]
belong = [c for c in df.columns if 'belong' in c]
efficacy = [c for c in df.columns if 'efficacy' in c]
support = [c for c in df.columns if 'support' in c]
socmedia = [c for c in df.columns if 'socmedia' in c]
usdream = [c for c in df.columns if 'usdream' in c]
transgres = [c for c in df.columns if 'transgres' in c]
exploit = [c for c in df.columns if 'exploit' in c]
disability = [c for c in df.columns if 'disability' in c]
phys = [c for c in df.columns if 'phys' in c]
stress = [c for c in df.columns if 'stress' in c]
marriage = [c for c in df.columns if 'marriage' in c]
demo = [c for c in df.columns if 'demo' in c]

In [37]:
# To keep only questionaire-style columns, drop column names that contain these terms  
xtra_terms = ['bias', 'dummy', 'duration', 'finished', 'years', 'age']
xtra_cols = [c for c in df.columns if any(x in c for x in xtra_terms)]

cond = ~df.columns.isin(xtra_cols)
df1 = df.loc[:, cond]
df1.head()

Unnamed: 0,moa1#1_1,moa1#1_2,moa1#1_3,moa1#1_4,moa1#1_5,moa1#1_6,moa1#1_7,moa1#1_8,moa1#1_9,moa1#1_10,moa1#2_1,moa1#2_2,moa1#2_3,moa1#2_4,moa1#2_5,moa1#2_6,moa1#2_7,moa1#2_8,moa1#2_9,moa1#2_10,moa2#1_1,moa2#1_2,moa2#1_3,moa2#1_4,moa2#1_5,moa2#1_6,moa2#1_7,moa2#1_8,moa2#1_9,moa2#1_10,moa2#2_1,moa2#2_2,moa2#2_3,moa2#2_4,moa2#2_5,moa2#2_6,moa2#2_7,moa2#2_8,moa2#2_9,moa2#2_10,moa_adult,idea_1,idea_2,idea_3,idea_4,idea_5,idea_6,idea_7,idea_8,swb_1,swb_2,swb_3,swb_4,swb_5,swb_6,mindful_1,mindful_2,mindful_3,mindful_4,mindful_5,mindful_6,mindful_7,mindful_8,mindful_9,mindful_10,mindful_11,mindful_12,mindful_13,mindful_14,mindful_15,belong_1,belong_2,belong_3,belong_4,belong_5,belong_6,belong_7,belong_8,belong_9,belong_10,belong_now,efficacy_1,efficacy_2,efficacy_3,efficacy_4,efficacy_5,efficacy_6,efficacy_7,efficacy_8,efficacy_9,efficacy_10,support_1,support_2,support_3,support_4,support_5,support_6,support_7,support_8,support_9,support_10,support_11,support_12,socmedia_1,socmedia_2,socmedia_3,socmedia_4,socmedia_5,socmedia_6,socmedia_7,socmedia_8,socmedia_9,socmedia_10,socmedia_11,usdream_1,usdream_2,transgres_freq,transgres_common,transgres_1,transgres_2,transgres_3,transgres_4,exploit_1,exploit_2,exploit_3,disability11,disability14_1,disability14_2,disability14_3,disability14_4,disability14_5,disability10_1,disability10_2,disability10_3,disability10_4,disability10_5,disability10_6,disability10_7,disability10_8,disability10_9,disability10_10,disability10_11,disability10_12,disability10_13,disability10_14,disability10_15,physsx_1,physsx_2,physsx_3,physsx_4,physsx_5,physsx_6,physsx_7,physsx_8,physsx_9,physsx_10,physsx_11,physsx_12,physsx_13,stress_1,stress_2,stress_3,stress_4,stress_5,stress_6,stress_7,stress_8,stress_9,stress_10,demo_mil,demo_us
0,4.0,4.0,3.0,2.0,2.0,3.0,2.0,1.0,4.0,3.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,3.0,3.0,2.0,4.0,4.0,4.0,4.0,3.0,4.0,4.0,4.0,3.0,2.0,2.0,1.0,1.0,3.0,2.0,3.0,3.0,2.0,2.0,1.0,3.0,3.0,4.0,4.0,3.0,4.0,4.0,4.0,4.0,4.0,6.0,5.0,5.0,3.0,3.0,4.0,2.0,2.0,2.0,4.0,1.0,2.0,2.0,2.0,2.0,2.0,4.0,1.0,2.0,4.0,4.0,2.0,4.0,4.0,4.0,2.0,5.0,2.0,4.0,3.0,4.0,4.0,3.0,4.0,3.0,3.0,4.0,3.0,3.0,4.0,3.0,7.0,4.0,6.0,5.0,6.0,6.0,7.0,7.0,7.0,4.0,6.0,7.0,4.0,2.0,5.0,3.0,5.0,5.0,5.0,4.0,5.0,5.0,4.0,4.0,4.0,3.0,3.0,3.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,3.0,2.0,2.0,4.0,5.0,3.0,3.0,3.0,4.0,3.0,3.0,3.0,0.0,0.0
1,4.0,4.0,4.0,2.0,3.0,3.0,4.0,3.0,3.0,3.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,3.0,3.0,4.0,2.0,4.0,4.0,3.0,2.0,4.0,2.0,1.0,3.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,3.0,4.0,4.0,4.0,4.0,3.0,4.0,4.0,4.0,3.0,4.0,5.0,5.0,4.0,4.0,2.0,2.0,2.0,1.0,3.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,1.0,1.0,5.0,2.0,3.0,1.0,5.0,4.0,4.0,2.0,4.0,5.0,4.0,4.0,3.0,3.0,3.0,4.0,4.0,4.0,3.0,3.0,3.0,4.0,7.0,7.0,7.0,6.0,7.0,6.0,6.0,7.0,7.0,7.0,7.0,7.0,3.0,2.0,4.0,2.0,1.0,1.0,1.0,1.0,2.0,4.0,2.0,4.0,4.0,4.0,4.0,4.0,3.0,2.0,1.0,4.0,4.0,3.0,2.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,1.0,1.0,3.0,1.0,1.0,3.0,2.0,1.0,1.0,3.0,3.0,4.0,5.0,5.0,4.0,3.0,3.0,2.0,2.0,4.0,4.0,0.0,0.0
2,4.0,4.0,4.0,1.0,1.0,4.0,2.0,3.0,4.0,3.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,3.0,3.0,4.0,2.0,2.0,4.0,3.0,3.0,4.0,4.0,3.0,2.0,2.0,1.0,1.0,2.0,1.0,2.0,3.0,1.0,1.0,1.0,3.0,4.0,4.0,4.0,4.0,4.0,4.0,3.0,3.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,1.0,2.0,3.0,1.0,2.0,2.0,5.0,3.0,2.0,1.0,1.0,1.0,4.0,4.0,4.0,2.0,5.0,4.0,4.0,2.0,3.0,4.0,4.0,2.0,3.0,3.0,1.0,2.0,2.0,3.0,1.0,3.0,2.0,2.0,6.0,6.0,5.0,2.0,7.0,5.0,5.0,3.0,6.0,6.0,5.0,6.0,3.0,3.0,4.0,2.0,3.0,4.0,4.0,2.0,3.0,3.0,3.0,2.0,2.0,6.0,5.0,3.0,1.0,4.0,1.0,5.0,5.0,3.0,2.0,2.0,2.0,2.0,1.0,2.0,4.0,3.0,4.0,2.0,4.0,5.0,3.0,1.0,4.0,1.0,3.0,1.0,4.0,2.0,2.0,3.0,1.0,1.0,3.0,2.0,2.0,1.0,3.0,2.0,1.0,2.0,3.0,3.0,4.0,4.0,5.0,2.0,1.0,5.0,2.0,2.0,4.0,4.0,0.0,0.0
3,4.0,3.0,3.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,2.0,3.0,4.0,2.0,2.0,4.0,3.0,2.0,4.0,2.0,3.0,2.0,2.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,3.0,4.0,4.0,3.0,3.0,4.0,4.0,4.0,4.0,5.0,6.0,6.0,5.0,6.0,3.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,3.0,2.0,2.0,3.0,3.0,2.0,4.0,3.0,4.0,1.0,5.0,4.0,5.0,2.0,4.0,4.0,4.0,4.0,4.0,1.0,2.0,3.0,2.0,4.0,2.0,3.0,4.0,3.0,6.0,6.0,7.0,3.0,7.0,6.0,5.0,4.0,6.0,6.0,6.0,5.0,4.0,2.0,5.0,2.0,2.0,4.0,4.0,1.0,3.0,4.0,4.0,1.0,3.0,3.0,2.0,4.0,2.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,2.0,3.0,1.0,2.0,1.0,2.0,1.0,1.0,2.0,3.0,2.0,2.0,4.0,4.0,3.0,4.0,5.0,4.0,2.0,2.0,2.0,0.0,0.0
4,4.0,4.0,4.0,4.0,4.0,4.0,1.0,2.0,4.0,4.0,2.0,2.0,2.0,1.0,1.0,2.0,1.0,3.0,3.0,1.0,4.0,4.0,4.0,4.0,4.0,4.0,2.0,4.0,4.0,2.0,3.0,1.0,3.0,3.0,2.0,3.0,1.0,2.0,3.0,1.0,3.0,4.0,4.0,3.0,2.0,4.0,4.0,3.0,3.0,5.0,5.0,5.0,5.0,5.0,5.0,3.0,3.0,4.0,3.0,3.0,3.0,4.0,4.0,4.0,4.0,4.0,3.0,3.0,3.0,3.0,4.0,3.0,3.0,3.0,4.0,2.0,5.0,3.0,4.0,4.0,4.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,7.0,5.0,6.0,6.0,5.0,6.0,7.0,6.0,7.0,6.0,5.0,6.0,3.0,2.0,4.0,5.0,4.0,4.0,2.0,3.0,4.0,3.0,3.0,5.0,4.0,2.0,2.0,1.0,1.0,1.0,1.0,6.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,1.0,3.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,2.0,0.0,1.0


In [36]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2826 entries, 0 to 2825
Columns: 171 entries, moa1#1_1 to demo_us
dtypes: float64(171)
memory usage: 3.7 MB
