In [1]:
import os
import os.path
import fnmatch
import pandas as pd
import string

### Domains Functions

In [2]:
class Subject:
    def __init__(self, subject_dict):
        self.subject_dict = subject_dict
        self.id = subject_dict['ID']
        
        self.total_walking_MET_minutes_week = 0
        self.total_moderate_MET_minutes_week = 0
        self.total_vigorous_MET_minutes_week = 0
        
        self.work_MET_minutes_week = self.work_MET_minutes_week()
        self.transport_MET_minutes_week = self.transport_MET_minutes_week()
        self.domestic_yard_MET_minutes_week = self.domestic_yard_MET_minutes_week()
        self.leisure_MET_minutes_week = self.leisure_MET_minutes_week()
        
        self.MET = self.work_MET_minutes_week + self.transport_MET_minutes_week + self.domestic_yard_MET_minutes_week + self.leisure_MET_minutes_week
        
        self.category = ''
        
    # Functions for the Categorical conversion
    def sum_vigorous_days(self):
        return self.subject_dict['Work_Vigorous_Days'] + self.subject_dict['Leisure_Vigorous_Days'] 
    
    def sum_moderate_days(self):
        return self.subject_dict['Work_Moderate_Days'] + self.subject_dict['Transport_Cycle_Days'] + self.subject_dict['Yard_Moderate_Days'] + self.subject_dict['Yard_Vigorous_Days'] + self.subject_dict['Domestic_Moderate_Days'] + self.subject_dict['Leisure_Moderate_Days'] 
      
    def sum_all_combinations(self):
        sum_work_days = self.subject_dict['Work_Walking_Days'] + self.subject_dict['Work_Moderate_Days'] + self.subject_dict['Work_Vigorous_Days']
        sum_transport_days = self.subject_dict['Transport_Walking_Days'] + self.subject_dict['Transport_Cycle_Days']
        sum_domestic_yard_days = self.subject_dict['Domestic_Moderate_Days'] + self.subject_dict['Yard_Moderate_Days'] + self.subject_dict['Yard_Vigorous_Days']
        sum_leisure_days = self.subject_dict['Leisure_Walking_Days'] + self.subject_dict['Leisure_Moderate_Days'] + self.subject_dict['Leisure_Vigorous_Days']
        return sum_work_days + sum_transport_days + sum_domestic_yard_days + sum_leisure_days
    
    def check_vigorous_20_minutes(self):
        if self.subject_dict['Work_Vigorous_Minutes'] >= 20 and self.subject_dict['Leisure_Vigorous_Minutes'] >= 20:
            return True
        else:
            return False
        
    def check_walking_30_minutes(self):
        if self.subject_dict['Work_Walking_Minutes'] >= 30 and self.subject_dict['Transport_Walking_Minutes'] >= 30 and self.subject_dict['Leisure_Walking_Minutes'] >= 30:
            return True
        else:
            return False
    
    # MET Evaluation
    # Work Domain
    def work_MET_minutes_week(self):
        walking_MET_minutes_week = 3.3 * float(self.subject_dict['Work_Walking_Days']) * float(self.subject_dict['Work_Walking_Minutes'])
        moderate_MET_minutes_week = 4.0 * float(self.subject_dict['Work_Moderate_Days']) * float(self.subject_dict['Work_Moderate_Minutes'])
        vigorous_MET_minutes_week = 8.0 * float(self.subject_dict['Work_Vigorous_Days']) * float(self.subject_dict['Work_Vigorous_Minutes'])
        
        self.total_walking_MET_minutes_week = self.total_walking_MET_minutes_week + walking_MET_minutes_week
        self.total_moderate_MET_minutes_week = self.total_moderate_MET_minutes_week + moderate_MET_minutes_week
        self.total_vigorous_MET_minutes_week = self.total_vigorous_MET_minutes_week + vigorous_MET_minutes_week
        
        return walking_MET_minutes_week + moderate_MET_minutes_week + vigorous_MET_minutes_week

    # Transport Domain
    def transport_MET_minutes_week(self):
        walking_MET_minutes_week = 3.3 * float(self.subject_dict['Transport_Walking_Days']) * float(self.subject_dict['Transport_Walking_Minutes'])
        cycling_MET_minutes_week = 6.0 * float(self.subject_dict['Transport_Cycle_Days']) * float(self.subject_dict['Transport_Cycle_Minutes'])
        
        self.total_walking_MET_minutes_week = self.total_walking_MET_minutes_week + walking_MET_minutes_week
        self.total_moderate_MET_minutes_week = self.total_moderate_MET_minutes_week + cycling_MET_minutes_week
        
        return walking_MET_minutes_week + cycling_MET_minutes_week 

    # Domestic & Yard Domain
    def domestic_yard_MET_minutes_week(self):
        vigorous_yard_MET_minutes_week = 5.5 * float(self.subject_dict['Yard_Vigorous_Days']) * float(self.subject_dict['Yard_Vigorous_Minutes'])
        moderate_yard_MET_minutes_week = 4.0 * float(self.subject_dict['Yard_Moderate_Days']) * float(self.subject_dict['Yard_Moderate_Minutes'])
        moderate_domestic_MET_minutes_week = 3.0 * float(self.subject_dict['Domestic_Moderate_Days']) * float(self.subject_dict['Domestic_Moderate_Minutes'])
        
        self.total_moderate_MET_minutes_week = self.total_moderate_MET_minutes_week + vigorous_yard_MET_minutes_week + moderate_yard_MET_minutes_week + moderate_domestic_MET_minutes_week
        
        return vigorous_yard_MET_minutes_week + moderate_yard_MET_minutes_week + moderate_domestic_MET_minutes_week

    # Leisure Domain
    def leisure_MET_minutes_week(self):
        walking_MET_minutes_week = 3.3 * float(self.subject_dict['Leisure_Walking_Days']) * float(self.subject_dict['Leisure_Walking_Minutes'])
        moderate_MET_minutes_week = 4.0 * float(self.subject_dict['Leisure_Moderate_Days']) * float(self.subject_dict['Leisure_Moderate_Minutes'])
        vigorous_MET_minutes_week = 8.0 * float(self.subject_dict['Leisure_Vigorous_Days']) * float(self.subject_dict['Leisure_Vigorous_Minutes'])
        
        self.total_walking_MET_minutes_week = self.total_walking_MET_minutes_week + walking_MET_minutes_week
        self.total_moderate_MET_minutes_week = self.total_moderate_MET_minutes_week + moderate_MET_minutes_week
        self.total_vigorous_MET_minutes_week = self.total_vigorous_MET_minutes_week + vigorous_MET_minutes_week
        
        return walking_MET_minutes_week + moderate_MET_minutes_week + vigorous_MET_minutes_week
    
    # MET evaluated in both ways
    def MET_minutes_week(self):
        return self.work_MET_minutes_week + self.transport_MET_minutes_week + self.domestic_yard_MET_minutes_week + self.leisure_MET_minutes_week
    
    def total_MET_minutes_week(self):
        return self.total_walking_MET_minutes_week + self.total_moderate_MET_minutes_week + self.total_vigorous_MET_minutes_week
    

### Main

In [3]:
spreadsheet_path = "/input_IPAQ_spreadsheet.csv"

df = pd.read_csv(spreadsheet_path)
dict_df = df.to_dict(orient='records')

### Categorical conversion

In [4]:
out_df_rows = []

for i in range(len(df)):
    subject = Subject(dict_df[i])
    
    if (subject.MET >= 1500 and subject.sum_vigorous_days() >= 3) or \
            (subject.MET >= 3000 and subject.sum_all_combinations() >= 7):
                subject.category = 'High'
        
    elif (subject.sum_vigorous_days() >= 3 and subject.check_vigorous_20_minutes() == True) or \
            (subject.sum_moderate_days() >= 5 or subject.check_walking_30_minutes() == True) or \
                (subject.MET >= 600 and subject.sum_all_combinations() >= 5):
                    subject.category = 'Moderate'
    else:
        subject.category = 'Low'
        
    out_df_rows.append([subject.id, subject.category])    

### Create new Dataframe

In [5]:
out_df = pd.DataFrame(out_df_rows, columns=["ID", "IPAQ_Category"])

In [6]:
out_df.to_csv('subjects_categories.csv', index=False)