In [15]:
#packages
import pandas as pd
import numpy as np
import sas7bdat
pd.set_option('display.max_columns', 500)

In [16]:
#import SAS files
from sas7bdat import SAS7BDAT
d2013 = SAS7BDAT('chs2013_public.sas7bdat')
d2012 = SAS7BDAT('chs2012_public.sas7bdat')
d2010 = SAS7BDAT('chs2010_public.sas7bdat')
d2009 = SAS7BDAT('chs2009_public.sas7bdat')
d2008 = SAS7BDAT('chs2008_public.sas7bdat')

#convert SAS files to pandas dataframe
data2013 = d2013.to_data_frame()
data2012 = d2012.to_data_frame()
data2010 = d2010.to_data_frame()
data2009 = d2009.to_data_frame()
data2008 = d2008.to_data_frame()

#### Data Documentation

mood1: During the past 30 days, how often did you feel so sad or depressed that nothing could cheer you up?

mood2: During the past 30 days, how often did you feel nervous?

mood3: During the past 30 days, how often did you feel restless or fidgety?

mood4: During the past 30 days, how often did you feel hopeless?

mood5: During the past 30 days, how often did you feel that everything was an effort?

mood6: During the past 30 days, how often did you feel down on yourself, no good or worthless?



Mood1-mood6 share the following scale:
    
    1= All of the time
    
    2= Most of the time
    
    3= Some of the time
    
    4= A little of the time
    
    5= None of the time



nspd: Were you FIRST told by a doctor, nurse, or other health professional that you have depression in the LAST 12
MONTHS?
    1= Yes
    
    2= No

In [17]:
#join values into one dataframe for further preparation
data = pd.concat([data2013, data2012, data2010, data2009, data2008], axis=0)

In [18]:
#subset only the selected variables into a new dataframe
data_clean = data[['mood1','mood2','mood3','mood4','mood5','mood6','nspd','uhf34']]

In [19]:
#create the mental health metric based on mood1-mood6. is an average of those six general mental health indicators
data_clean['agg_mood'] = (data_clean['mood1'] + data_clean['mood2'] + data_clean['mood3']
                        + data_clean['mood4'] + data_clean['mood5'] + data_clean['mood6'])/6
data_clean = data_clean[['mood1','mood2','mood3','mood4','mood5','mood6','agg_mood','nspd','uhf34']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [20]:
#group the data by mean by UHF34 group for joining to main dataset
agg_data = data_clean.groupby('uhf34').mean()

#agg_mood and nspd is being used as a metric value, so transformations applied to normalize values
#values are normalized to a 1-10 scale, where 10 is the lowest amount of problems and 1 is the highest amount
#of problems
agg_data['nspd'] = (agg_data['nspd']-1.9)*100
agg_data['nspd'] = (agg_data['nspd']/max(agg_data['nspd']))*10
agg_data['agg_mood'] = (agg_data['agg_mood']-4)*10
agg_data['agg_mood'] = (agg_data['agg_mood']/max(agg_data['agg_mood']))*10
agg_data

Unnamed: 0_level_0,mood1,mood2,mood3,mood4,mood5,mood6,agg_mood,nspd
uhf34,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,4.457784,4.162269,4.31357,4.686016,4.123847,4.751979,8.207623,8.731866
2,4.338776,4.344898,4.286735,4.643878,3.925281,4.679592,7.286999,6.631667
3,4.211938,4.18961,4.09083,4.480519,3.847619,4.595156,4.650868,2.619096
4,4.33749,4.219969,4.219018,4.592362,4.047545,4.653936,6.775805,5.551582
5,4.137987,4.195003,4.103862,4.437703,3.770055,4.528265,3.823111,1.355953
6,4.329365,3.98543,4.097884,4.568212,4.0,4.636122,5.301943,3.983305
7,4.524834,4.138245,4.23096,4.680464,4.152697,4.759934,8.13194,8.883027
8,4.391737,4.287462,4.267584,4.659021,3.901991,4.746554,7.390408,8.079677
9,4.316406,4.345703,4.225586,4.611328,3.852395,4.677734,6.662015,6.686475
10,4.34992,4.202247,4.194222,4.569823,4.069021,4.65008,6.659101,6.212632


In [22]:
#print the data to a CSV if convenient
agg_data.to_csv(path_or_buf='chs_aggregated.csv')