In [6]:
import pandas as pd
import numpy as np
import glob, os

In [7]:

def participant_stats(filename, headers):
    patient_df = pd.read_csv(filename)
  
    patient_stats = mood_change_stats(patient_df)
    patient_stats_tup = zip(headers, patient_stats)
    patient_stats_dict = dict(patient_stats_tup)
    return patient_stats_dict
    
def get_participant_days(patient):
    days_with_survey = patient[patient['EndTS'].notnull()]
    num_days = len(days_with_survey['StudayDay'].unique())
#     print 'Number of active days in the study: %d' % num_days
    return num_days

# 1 means better
# 2 means worse
def mood_change_stats(patient):
    patient_data = []
    patient_data.append(patient['Patient'][1])
    
    num_days = get_participant_days(patient) 
    changes_df = patient[patient['Type.1'] == 'MD']
    positive_changes_df = changes_df[changes_df['Are you in a better or worse mood now than before?'] == 1]
    negative_changes_df = changes_df[changes_df['Are you in a better or worse mood now than before?'] == 2]
    
    total_changes = len(changes_df)
    num_positive = len(positive_changes_df)
    num_negative = len(negative_changes_df)
    if total_changes == 0:
        percent_positive = None
        percent_negative = None
        
    else:
        percent_positive = 100 * (num_positive / total_changes)
        percent_negative = 100 * (num_negative / total_changes)
    
    pos_degree_avg = positive_changes_df['How much did your mood change?'].mean()
    neg_degree_avg = negative_changes_df['How much did your mood change?'].mean()
    
    patient_data.extend([num_days,total_changes, num_positive, num_negative, percent_positive, percent_negative,
                         pos_degree_avg, neg_degree_avg])
    
#     print 'Number of total changes: %d' % total_changes
#     print 'Number of positive changes: %d' % num_positive
#     print 'Number of negative changes: %d' % num_negative
#     print 'Percentage of changes positive: %.2f' % percent_positive
#     print 'Percentage of changes negative: %.2f' % percent_negative
#     print 'Average positive change in mood: %.2f' % pos_degree_avg
#     print 'Average negative change in mood: %.2f' % neg_degree_avg
    

    pos_triggers = positive_triggers(positive_changes_df, num_positive)
    patient_data.extend(pos_triggers)
    
    neg_triggers = negative_triggers(negative_changes_df, num_negative)
    patient_data.extend(neg_triggers)
    
    return patient_data
     
def positive_triggers(pos_df, num_positive):
    pos_triggers = []
    for i in range(58, 71):
        pos_triggers.append((pos_df[[i]].values.sum() / num_positive) * 100)
 
    return pos_triggers

def negative_triggers(neg_df, num_negative):
    neg_triggers = []
    for i in range(71, 84):
        neg_triggers.append((neg_df[[i]].values.sum() / num_negative) * 100)
 
    return neg_triggers


In [8]:
def mood_change_stats_all(patient):
    patient_data = []
    patient_data.append('All Avg.')
    
    num_days = get_participant_days(patient) 
    changes_df = patient[patient['Type.1'] == 'MD']
    positive_changes_df = changes_df[changes_df['Are you in a better or worse mood now than before?'] == 1]
    negative_changes_df = changes_df[changes_df['Are you in a better or worse mood now than before?'] == 2]
    
    total_changes = len(changes_df)
    num_positive = len(positive_changes_df)
    num_negative = len(negative_changes_df)
   
        
    percent_positive = 100 * float(num_positive / total_changes)
    percent_negative = 100 * float(num_negative / total_changes)
    
    pos_degree_avg = positive_changes_df['How much did your mood change?'].mean()
    neg_degree_avg = negative_changes_df['How much did your mood change?'].mean()
    
    patient_data.extend([num_days,total_changes, num_positive, num_negative, percent_positive, percent_negative,
                         pos_degree_avg, neg_degree_avg])
        

    pos_triggers = positive_triggers(positive_changes_df, num_positive)
    patient_data.extend(pos_triggers)
    
    neg_triggers = negative_triggers(negative_changes_df, num_negative)
    patient_data.extend(neg_triggers)
    
    return patient_data

In [9]:
survey_files = glob.glob('[0-9]*.csv')

headers = ['Participant', 'Active Days', 'Total Mood Changes', 'Positive Changes', 'Negative Changes', 'Percent Positive','Percent Negative', 'Degree Positive Avg','Degree Negative Avg',
           'Pos: received good news', 'Pos: someone complimented me', 'Pos: used alcohol', 'Pos: used drugs', 'Pos: used prescribed medications', 'Pos: had sex', 'Pos: spent time with someone close',
           'Pos: had nice day or evening', 'Pos: exercised', 'Pos: did something proud of', 'Pos: felt accepted and supported', 'Pos: other', 'Pos: NO TRIGGER',
           'Neg: lack of sleep' ,'Neg: argument or conflict','Neg: used alcohol','Neg: used drugs','Neg: used prescribed medications','Neg: problem at work or school','Neg: stress','Neg: pain or bodiy discomfort',
           'Neg: received bad news','Neg: upset/mad at myself','Neg: felt rejected','Neg: other','Neg: NO TRIGGER']

all_files_list = []
for s_file in survey_files:
    patient_stats_dict = participant_stats(s_file, headers)
    all_files_list.append(patient_stats_dict);

    
alldf = pd.concat((pd.read_csv(f) for f in survey_files))
all_patient_stats = mood_change_stats_all(alldf)
all_patient_stats_tup = zip(headers, all_patient_stats)
all_patient_stats_dict = dict(all_patient_stats_tup)
all_patient_stats_dict['Participant'] = 'All Avg'
  
    

all_files_list.append(all_patient_stats_dict)
stats_all_df = pd.DataFrame(all_files_list, columns=headers)
stats_all_df.to_csv('basic_stats.csv', index = False)
