In [2]:
import pandas as pd

# Data processing. Stops

#### 1. Median, Mode and IQR
#### 2. Average temperature and humidex

<br>

# 1. Median, Mode and IQR
Functions to obtain the Median, the Mode and the IQR for a given DataFrame (survey dataset)



In [3]:
def mediana(df,codis_list):
    '''
    Function that obtains the median of the votes. It returns the answer of the specific question of the survey where the
    vot corresponding to the 50% is located. In fact, the function returns a list of medians for each stop of the same trolley.
    
    1. The options (answers) are ordered. For example, the survey about the thermal confort has 7 optioms from 
    "Very uncomfortable" to "Very comfortable". 
    
    2. Then, going from very uncomfortable to very comfortable, we count the number of votes (cumulative sum) until we find
    the vot that corresponds to the 50% of the votes. The median is the corresponding category.
    
        - If the number of votes is even, there is no problem in obtaining the vot number 50%. For example, if there are 10 
        votes in total, we need to look where the vote number 5 is located. 
        
        - However, if the number of votes is odd, then we have different cases. Using the example of N = 11 votes.
            · N/2 is 5'5. Then we look at both options 5 and 6. That is, the categories where the votes 5 and 6 are located.
            · If both categories are the same, problem solved, and we this is the median.
            · If category(vot number 5) is different from category(vote number 6), then we take as the median, the most likely 
            category (with the highest number of votes). 
    '''

    median=[]  # Iterate over all the stops (list of codes of the stops)
    for i in codis_list:
    
        n_total = sum(df[i])   # Total number of votes

        cumsum_vots=0
        
        if df[i].isnull().sum() != 0:  # If the are no votes, the median is NaN
            median.append(np.nan)
        
        else: 
            for j in range(len(df)):

                cumsum_vots += df[i][j]       # Obtain the cumulative sum of the votes (ordered)

                if n_total % 2 == 0:  # Case of total number of votes EVEN

                    if cumsum_vots >= n_total/2:

                        median.append(df['tags'][j])

                        break

                else:  # Case of total number of votes ODD

                    limit_inf = int(n_total/2)
                    limit_sup = int(n_total/2) + 1


                    if cumsum_vots < limit_inf:
                        pass

                    if cumsum_vots >= limit_sup:
                        median.append(df['tags'][j])

                        break

                    if cumsum_vots >= limit_inf and cumsum_vots < limit_sup:

                        median.append([df['tags'][j],df['tags'][j+1]])
                        n_vots_limits=[df[i][j], df[i][j+1]]

                        break
        
        
        for p in range(len(median)):             # If median is a list (so there are two categories for the odd case)
            if type(median[p]) is list:          

                if median[p][0] == median[p][1]:   # If both categories of the list are the same, problem solved

                    median[p] = median[p][0]
                
                else:                       # If not, then we only take the most likely category (with the highest # of votes)
                    if n_vots_limits[0] >= n_vots_limits[1]:
                        median[p] = median[p][0]
                        
                    else:
                        median[p] = median[p][1]
                
    return median            # Return list of medians, for each stop

    
    
def moda(df,codis_list):
    '''
    Function that obtains the mode of the votes. It returns the answer of the specific question of the survey where with the
    highest number of votes. It in fact returns a dictionary, which contains the category as the key and the number of votes
    as the value. This is done to have an idea of the number of votes of the most likely category. 
    
    If there are two or more options with the same number of votes (highest ones), then we take all the options as a dictionary.
    
    The function returns a list of dictionaris (modes) for each stop of the same trolley.
    '''
    
    mode_list=[] # Iterate over all the stops (list of codes of the stops)
    for i in codis_list:
        
        if df[i].isnull().sum() != 0:  # If the are no votes, the median is NaN
            mode_list.append(np.nan)
        
        else:
            df2 = df[df[i].values == df[i].values.max()]   # Obtain a new data-frame with the highest number of votes

            mode = df2['tags'].tolist()       # use "tags" (categories) as the keys and occurs (number of votes) as the values
            mode_occurs = df2[i].tolist()     # for the dictionary

            mode_dict = dict(zip(mode, mode_occurs))

            mode_list.append(mode_dict)    # Return list of dictionaries, for each stop

    return mode_list  



def iqr(df,codis_list):
    '''
    Function that obtains the interquartile range (IQR) of the votes. It returns the two answers of the specific question of the
    survey where with the votes corresponding to the 25% and 75% are located.
    
    The function returns a list of IQR for each stop of the same trolley.
    '''
    
    q1 = []   # quartiles 1 (25%) and 3 (75%)
    q3 = []
    
    for i in codis_list:  # Iterate over all the stops (list of codes of the stops)
    
        n_total = sum(df[i])  # Total number of votes

        cumsum_vots=0
        
        if df[i].isnull().sum() != 0:  # If the number of votes is 0, then the iqr is NaN
            q1.append(np.nan)
            q3.append(np.nan)
            
        else: 
            for j in range(len(df)):

                cumsum_vots += df[i][j]    # cumulative sum of the votes, ordering the categories

                q1_vote = int(n_total*0.25)

                if cumsum_vots >= q1_vote:        # q1(25%)

                    q1.append(df['tags'][j])

                    break


            cumsum_vots2 = 0
            for jj in range(len(df)):            # We iterate again over the categories (cumulative sum of the votes)
                                                 # to obtain the q3(75%)
                cumsum_vots2 += df[i][jj]

                q3_vote = int(n_total*0.75)

                if cumsum_vots2 >= q3_vote:

                    q3.append(df['tags'][jj])

                    break            
            
    iqr = []
    for p in range(len(codis_list)):      # Obtain a list of IQRs for each stop
        
        iqr.append([q1[p],q3[p]])
    
    return iqr





def statistics_votes(df_all_surveys):
    '''
    Function that incoporates all the statistics above: the median, the mode and the IQR.
    It iterates over a list of data-frames (one for each trolley), and returns again all the data-frames with new-columns
    incorporating the statistics (median, mode and iqr) for each question of the survey (thermal confort, sensation, wind...)
    
    Different cases are contemplated, where there are just 3 stops, 5, 6 or 7, which are the different possible cases in our
    datasets.
    
    '''
    
    all_dfs = []
    
    for df in df_all_surveys:  # Iterate over all data-frames (surveys for each trolley)
        
        # Tags of each question of the survey: Thermal confort while walking, thermal confort, thermal sensation, wind and sun
        
        thermal_confort = ['Molt incòmode','Incòmode','Lleugerament incòmode','Neutre(confort)','Lleugerament còmode','Còmode',
                           'Molt còmode']

        thermal_confort_walk = ['Molt incòmode (walk)','Incòmode (walk)','Lleugerament incòmode (walk)','Neutre (walk)',
                                'Lleugerament còmode (walk)','Còmode (walk)','Molt còmode (walk)']

        thermal_sensation = ['Fresc','Lleugerament fresc','Neutre(sensacio)','Lleugerament càlid','Càlid','Calorós','Molt calorós'] 

        wind = ['No fa vent', 'Un vent molt fluix', 'Un vent fluix', 'Un vent moderat', 'Un vent fort']

        sun = ['A ple sol', 'En una barreja de sol i ombra', 'A plena ombra']

        confort=[]
        sensation=[]
        confort_walk=[]
        windd=[]
        sunn=[]

        for i in range(len(df)):    # Iterate over a given data-frame (survey)

            confort1=[]
            confort_walk1=[]
            sensation1=[]
            wind1=[]
            sun1=[]
            
            # Append in a list, the result of the survey for each answer (number of votes) and for a fixed stop
            # For example, for thermal confort: [#votes stop 1 option 1, #votes stop 1 option 2,...., #votes stop 1 option 7]
            
            for j in range(len(thermal_confort)):

                confort1.append(df[thermal_confort[j]][i])
                confort_walk1.append(df[thermal_confort_walk[j]][i])
                sensation1.append(df[thermal_sensation[j]][i])


            for jj in range(len(wind)):

                wind1.append(df[wind[jj]][i])


            for jjj in range(len(sun)):

                sun1.append(df[sun[jjj]][i])

            # Then append the lists of answers to a general list, with a postion for each stop
            confort.append(confort1)         
            confort_walk.append(confort_walk1)
            sensation.append(sensation1)
            windd.append(wind1)
            sunn.append(sun1)

        
        stop_codes = df['space_code'].tolist()
        
        # Create a data-frame for each survey question (thermal confort, thermal sensation...)
        # Case of 7 stops
        if len(df) == 7:
            df_thermal_confort = pd.DataFrame()
            df_thermal_confort['tags'] = thermal_confort
            df_thermal_confort[stop_codes[0]] = confort[0]
            df_thermal_confort[stop_codes[1]] = confort[1]
            df_thermal_confort[stop_codes[2]] = confort[2]
            df_thermal_confort[stop_codes[3]] = confort[3]
            df_thermal_confort[stop_codes[4]] = confort[4]
            df_thermal_confort[stop_codes[5]] = confort[5]
            df_thermal_confort[stop_codes[6]] = confort[6]

            df_thermal_confort_walk = pd.DataFrame()
            df_thermal_confort_walk['tags'] = thermal_confort_walk
            df_thermal_confort_walk[stop_codes[0]] = confort_walk[0]
            df_thermal_confort_walk[stop_codes[1]] = confort_walk[1]
            df_thermal_confort_walk[stop_codes[2]] = confort_walk[2]
            df_thermal_confort_walk[stop_codes[3]] = confort_walk[3]
            df_thermal_confort_walk[stop_codes[4]] = confort_walk[4]
            df_thermal_confort_walk[stop_codes[5]] = confort_walk[5]
            df_thermal_confort_walk[stop_codes[6]] = confort_walk[6]

            df_thermal_sensation = pd.DataFrame()
            df_thermal_sensation['tags'] = thermal_sensation
            df_thermal_sensation[stop_codes[0]] = sensation[0]
            df_thermal_sensation[stop_codes[1]] = sensation[1]
            df_thermal_sensation[stop_codes[2]] = sensation[2]
            df_thermal_sensation[stop_codes[3]] = sensation[3]
            df_thermal_sensation[stop_codes[4]] = sensation[4]
            df_thermal_sensation[stop_codes[5]] = sensation[5]
            df_thermal_sensation[stop_codes[6]] = sensation[6]
            
            df_wind = pd.DataFrame()
            df_wind['tags'] = wind
            df_wind[stop_codes[0]] = windd[0]
            df_wind[stop_codes[1]] = windd[1]
            df_wind[stop_codes[2]] = windd[2]
            df_wind[stop_codes[3]] = windd[3]
            df_wind[stop_codes[4]] = windd[4]
            df_wind[stop_codes[5]] = windd[5]
            df_wind[stop_codes[6]] = windd[6]
            
            df_sun = pd.DataFrame()
            df_sun['tags'] = sun
            df_sun[stop_codes[0]] = sunn[0]
            df_sun[stop_codes[1]] = sunn[1]
            df_sun[stop_codes[2]] = sunn[2]
            df_sun[stop_codes[3]] = sunn[3]
            df_sun[stop_codes[4]] = sunn[4]            
            df_sun[stop_codes[5]] = sunn[5]
            df_sun[stop_codes[6]] = sunn[6]
            
        # Case of 6 stops    
        if len(df) == 6:
            df_thermal_confort = pd.DataFrame()
            df_thermal_confort['tags'] = thermal_confort
            df_thermal_confort[stop_codes[0]] = confort[0]
            df_thermal_confort[stop_codes[1]] = confort[1]
            df_thermal_confort[stop_codes[2]] = confort[2]
            df_thermal_confort[stop_codes[3]] = confort[3]
            df_thermal_confort[stop_codes[4]] = confort[4]
            df_thermal_confort[stop_codes[5]] = confort[5]

            df_thermal_confort_walk = pd.DataFrame()
            df_thermal_confort_walk['tags'] = thermal_confort_walk
            df_thermal_confort_walk[stop_codes[0]] = confort_walk[0]
            df_thermal_confort_walk[stop_codes[1]] = confort_walk[1]
            df_thermal_confort_walk[stop_codes[2]] = confort_walk[2]
            df_thermal_confort_walk[stop_codes[3]] = confort_walk[3]
            df_thermal_confort_walk[stop_codes[4]] = confort_walk[4]
            df_thermal_confort_walk[stop_codes[5]] = confort_walk[5]

            df_thermal_sensation = pd.DataFrame()
            df_thermal_sensation['tags'] = thermal_sensation
            df_thermal_sensation[stop_codes[0]] = sensation[0]
            df_thermal_sensation[stop_codes[1]] = sensation[1]
            df_thermal_sensation[stop_codes[2]] = sensation[2]
            df_thermal_sensation[stop_codes[3]] = sensation[3]
            df_thermal_sensation[stop_codes[4]] = sensation[4]
            df_thermal_sensation[stop_codes[5]] = sensation[5]
        
            df_wind = pd.DataFrame()
            df_wind['tags'] = wind
            df_wind[stop_codes[0]] = windd[0]
            df_wind[stop_codes[1]] = windd[1]
            df_wind[stop_codes[2]] = windd[2]
            df_wind[stop_codes[3]] = windd[3]
            df_wind[stop_codes[4]] = windd[4]
            df_wind[stop_codes[5]] = windd[5]

            df_sun = pd.DataFrame()
            df_sun['tags'] = sun
            df_sun[stop_codes[0]] = sunn[0]
            df_sun[stop_codes[1]] = sunn[1]
            df_sun[stop_codes[2]] = sunn[2]
            df_sun[stop_codes[3]] = sunn[3]
            df_sun[stop_codes[4]] = sunn[4]            
            df_sun[stop_codes[5]] = sunn[5] 
         
        # Case of 5 stops
        if len(df) == 5:
            df_thermal_confort = pd.DataFrame()
            df_thermal_confort['tags'] = thermal_confort
            df_thermal_confort[stop_codes[0]] = confort[0]
            df_thermal_confort[stop_codes[1]] = confort[1]
            df_thermal_confort[stop_codes[2]] = confort[2]
            df_thermal_confort[stop_codes[3]] = confort[3]
            df_thermal_confort[stop_codes[4]] = confort[4]

            df_thermal_confort_walk = pd.DataFrame()
            df_thermal_confort_walk['tags'] = thermal_confort_walk
            df_thermal_confort_walk[stop_codes[0]] = confort_walk[0]
            df_thermal_confort_walk[stop_codes[1]] = confort_walk[1]
            df_thermal_confort_walk[stop_codes[2]] = confort_walk[2]
            df_thermal_confort_walk[stop_codes[3]] = confort_walk[3]
            df_thermal_confort_walk[stop_codes[4]] = confort_walk[4]

            df_thermal_sensation = pd.DataFrame()
            df_thermal_sensation['tags'] = thermal_sensation
            df_thermal_sensation[stop_codes[0]] = sensation[0]
            df_thermal_sensation[stop_codes[1]] = sensation[1]
            df_thermal_sensation[stop_codes[2]] = sensation[2]
            df_thermal_sensation[stop_codes[3]] = sensation[3]
            df_thermal_sensation[stop_codes[4]] = sensation[4]

            df_wind = pd.DataFrame()
            df_wind['tags'] = wind
            df_wind[stop_codes[0]] = windd[0]
            df_wind[stop_codes[1]] = windd[1]
            df_wind[stop_codes[2]] = windd[2]
            df_wind[stop_codes[3]] = windd[3]
            df_wind[stop_codes[4]] = windd[4]

            df_sun = pd.DataFrame()
            df_sun['tags'] = sun
            df_sun[stop_codes[0]] = sunn[0]
            df_sun[stop_codes[1]] = sunn[1]
            df_sun[stop_codes[2]] = sunn[2]
            df_sun[stop_codes[3]] = sunn[3]
            df_sun[stop_codes[4]] = sunn[4]

        # Case of 3 stops    
        if len(df) ==3:
            df_thermal_confort = pd.DataFrame()
            df_thermal_confort['tags'] = thermal_confort
            df_thermal_confort[stop_codes[0]] = confort[0]
            df_thermal_confort[stop_codes[1]] = confort[1]
            df_thermal_confort[stop_codes[2]] = confort[2]

            df_thermal_confort_walk = pd.DataFrame()
            df_thermal_confort_walk['tags'] = thermal_confort_walk
            df_thermal_confort_walk[stop_codes[0]] = confort_walk[0]
            df_thermal_confort_walk[stop_codes[1]] = confort_walk[1]
            df_thermal_confort_walk[stop_codes[2]] = confort_walk[2]

            df_thermal_sensation = pd.DataFrame()
            df_thermal_sensation['tags'] = thermal_sensation
            df_thermal_sensation[stop_codes[0]] = sensation[0]
            df_thermal_sensation[stop_codes[1]] = sensation[1]
            df_thermal_sensation[stop_codes[2]] = sensation[2]

            df_wind = pd.DataFrame()
            df_wind['tags'] = wind
            df_wind[stop_codes[0]] = windd[0]
            df_wind[stop_codes[1]] = windd[1]
            df_wind[stop_codes[2]] = windd[2]

            df_sun = pd.DataFrame()
            df_sun['tags'] = sun
            df_sun[stop_codes[0]] = sunn[0]
            df_sun[stop_codes[1]] = sunn[1]
            df_sun[stop_codes[2]] = sunn[2]        
            
        
        
        # For each question of the survey (data-frame confort, data-frame sensation...), calculate the median, the mode and IQR
        
        median_sensacio = mediana(df_thermal_sensation, stop_codes)
        median_confort = mediana(df_thermal_confort, stop_codes)
        median_walk_confort = mediana(df_thermal_confort_walk, stop_codes)
        median_wind = mediana(df_wind, stop_codes)
        median_sun = mediana(df_sun, stop_codes)

        mode_sensacio = moda(df_thermal_sensation, stop_codes)
        mode_confort = moda(df_thermal_confort, stop_codes)
        mode_walk_confort = moda(df_thermal_confort_walk, stop_codes)
        mode_wind = moda(df_wind, stop_codes)
        mode_sun = moda(df_sun, stop_codes)

        iqr_sensacio = iqr(df_thermal_sensation, stop_codes)
        iqr_confort = iqr(df_thermal_confort, stop_codes)
        iqr_walk_confort = iqr(df_thermal_confort_walk, stop_codes)
        iqr_wind = iqr(df_wind, stop_codes)
        iqr_sun = iqr(df_sun, stop_codes)

        
        # Add the results as new columns in the data-frame of the survey
        
        df['median_confort_walk'] = median_walk_confort
        df['mode_confort_walk'] = mode_walk_confort
        df['IQR_confort_walk'] = iqr_walk_confort

        df['median_confort'] = median_confort
        df['mode_confort'] = mode_confort
        df['IQR_confort'] = iqr_confort

        df['median_sensation'] = median_sensacio
        df['mode_sensation'] = mode_sensacio
        df['IQR_sensation'] = iqr_sensacio

        df['median_wind'] = median_wind
        df['mode_wind'] = mode_wind
        df['IQR_wind'] = iqr_wind

        df['median_sun'] = median_sun
        df['mode_sun'] = mode_sun
        df['IQR_sun'] =  iqr_sun

        all_dfs.append(df)    # append againall the data-frames (of each survey) to a list of data-frames
    
    return all_dfs


# 2. Average temperature and humidex
Using the arrival and the departure time, obtain the average temperature and humidex. We need the processed trajectories files 
together with the surveys (to combine the departure/arrival time with the average values).

The average values are stored as new columns of the surveys dataframes

In [4]:
def average_values_T_HDX(df_trajectory, df_survey):
    
    # Append each quantity in a list, for each stop
    avg_T_s0 = []
    avg_HDX_s0 = []
    avg_T_Tfix_s0 = []
    avg_HDX_HDXfix_s0 = []
    avg_Tnew_s0 = []
    avg_HDX_new_s0 = []
    avg_T_s2 = []
    avg_HDX_s2 = []
    avg_T_Tfix_s2 = []
    avg_HDX_HDXfix_s2 = []
    avg_Tnew_s2 = []
    avg_HDX_new_s2 = []
    
    avg_lat = []
    avg_lon = []
    
    # Correct format for Time (trajectory)
    df_trajectory['Time'] = pd.to_datetime(df_trajectory['Time'], format='%Y-%m-%d %H:%M:%S')
    df_trajectory['Time(s=2)'] = pd.to_datetime(df_trajectory['Time(s=2)'], format='%Y-%m-%d %H:%M:%S')
    
    for i in range(len(df_survey)):  # Iterate for each stop
    
    
        # Arrival and departure times for the given stop "i"
        
        # First, we need to transform  to datetime
        df_survey['arrival_time_datetime'] = df_survey['date']+' '+df_survey['arrival_time']
        df_survey['arrival_time_datetime']  = pd.to_datetime(df_survey['arrival_time_datetime'], format='%Y-%m-%d %H:%M')
    
        df_survey['departure_time_datetime'] = df_survey['date']+' '+df_survey['departure_time']
        df_survey['departure_time_datetime']  = pd.to_datetime(df_survey['departure_time_datetime'], format='%Y-%m-%d %H:%M')
        
        arrival_time = df_survey['arrival_time_datetime'][i]              
        departure_time = df_survey['departure_time_datetime'][i]

        # We no longer need these additional columns (delete)
        del df_survey['arrival_time_datetime']
        del df_survey['departure_time_datetime']
        
        # Case of no moving average (s = 0 minutes):

        df_trajectory_s0 = df_trajectory.loc[(df_trajectory['Time'] >= arrival_time) & (df_trajectory['Time'] <= departure_time)].reset_index()
        del df_trajectory_s0['index']


        avg_T_s0.append(df_trajectory_s0['Temp[°C]'].mean())
        avg_HDX_s0.append(df_trajectory_s0['HDX[°C]'].mean())
        avg_T_Tfix_s0.append(df_trajectory_s0['T-T_fixed'].mean())
        avg_HDX_HDXfix_s0.append(df_trajectory_s0['HDX-HDX_fixed'].mean())
        avg_Tnew_s0.append(df_trajectory_s0['T-T_fixed+<T>'].mean())
        avg_HDX_new_s0.append(df_trajectory_s0['HDX-HDX_fixed+<HDX>'].mean())

        avg_lat.append(df_trajectory_s0['Lat'].mean())
        avg_lon.append(df_trajectory_s0['Lon'].mean())

        
        # Case of moving average 2 minutes (s = 2 minutes):

        df_trajectory_s2 = df_trajectory.loc[(df_trajectory['Time(s=2)'] >= arrival_time) & (df_trajectory['Time(s=2)'] <= departure_time )].reset_index()
        del df_trajectory_s2['index']


        avg_T_s2.append(df_trajectory_s2['avg_moving_Temp[°C]_120s'].mean())
        avg_HDX_s2.append(df_trajectory_s2['avg_moving_HDX[°C]_120s'].mean())
        avg_T_Tfix_s2.append(df_trajectory_s2['avg_moving_T-T_fixed_120s'].mean())
        avg_HDX_HDXfix_s2.append(df_trajectory_s2['avg_moving_HDX-HDX_fixed_120s'].mean())
        avg_Tnew_s2.append(df_trajectory_s2['avg_moving_T-T_fixed+<T>_120s'].mean())
        avg_HDX_new_s2.append(df_trajectory_s2['avg_moving_HDX-HDX_fixed+<HDX>_120s'].mean())        


        
    # Aggregate columns to the data-frame survey, with the averaged quantities for each stop
    
    df_survey['<T>(s=0)'] = avg_T_s0
    df_survey['<T>(s=2)'] = avg_T_s2
    df_survey['<T-T_fixed>(s=0)'] = avg_T_Tfix_s0
    df_survey['<T-T_fixed>(s=2)'] = avg_T_Tfix_s2
    df_survey['<T-T_fixed+<T>>(s=0)'] = avg_Tnew_s0
    df_survey['<T-T_fixed+<T>>(s=2)'] = avg_Tnew_s2
    df_survey['<HDX>(s=0)'] = avg_HDX_s0
    df_survey['<HDX>(s=2)'] = avg_HDX_s2
    df_survey['<HDX-HDX_fixed>(s=0)'] = avg_HDX_HDXfix_s0
    df_survey['<HDX-HDX_fixed>(s=2)'] = avg_HDX_HDXfix_s2
    df_survey['<HDX-HDX_fixed+<HDX>>(s=0)'] = avg_HDX_new_s0
    df_survey['<HDX-HDX_fixed+<HDX>>(s=2)'] = avg_HDX_new_s2   
    
    df_survey['<latitude>'] = avg_lat
    df_survey['<longitude>'] = avg_lon
          
    
    return df_survey

# Example with data from Fundació Comtal

    - Date of the experiment: 11/07/2024
    - 5 trolleys with 2 sensors per trolley
    - 1 fixed trolley with 2 sensors

##  0. Read the data-sets

In [None]:
# Trajectories 
df_comtal_carro1 = pd.read_csv('f_comtal\\df_comtal_carro1.csv')
df_comtal_carro2 = pd.read_csv('f_comtal\\df_comtal_carro2.csv')
df_comtal_carro3 = pd.read_csv('f_comtal\\df_comtal_carro3.csv')
df_comtal_carro4 = pd.read_csv('f_comtal\\df_comtal_carro4.csv')
df_comtal_carro5 = pd.read_csv('f_comtal\\df_comtal_carro5.csv')

# Surveys
survey_comtal_carro1 = pd.read_csv('f_comtal\\survey_comtal_carro1.csv')
survey_comtal_carro2 = pd.read_csv('f_comtal\\survey_comtal_carro2.csv')
survey_comtal_carro3 = pd.read_csv('f_comtal\\survey_comtal_carro3.csv')
survey_comtal_carro4 = pd.read_csv('f_comtal\\survey_comtal_carro4.csv')
survey_comtal_carro5 = pd.read_csv('f_comtal\\survey_comtal_carro5.csv')

#### The survey data-set is already processed with the median, mode, iqr calculated etc. So we only have to execute the function to obtain the averaged quantities of temperature and humidex

In [None]:
survey_comtal_carro1 = average_values_T_HDX(df_comtal_carro1, survey_comtal_carro1)
survey_comtal_carro2 = average_values_T_HDX(df_comtal_carro2, survey_comtal_carro2)
survey_comtal_carro3 = average_values_T_HDX(df_comtal_carro3, survey_comtal_carro3)
survey_comtal_carro4 = average_values_T_HDX(df_comtal_carro4, survey_comtal_carro4)
survey_comtal_carro5 = average_values_T_HDX(df_comtal_carro5, survey_comtal_carro5)

# Save the data-frames in csv files (surveys)

In [None]:
#survey_comtal_carro1.to_csv('f_comtal\\survey_comtal_carro1.csv',index=False)
#survey_comtal_carro2.to_csv('f_comtal\\survey_comtal_carro2.csv',index=False)
#survey_comtal_carro3.to_csv('f_comtal\\survey_comtal_carro3.csv',index=False)
#survey_comtal_carro4.to_csv('f_comtal\\survey_comtal_carro4.csv',index=False)
#survey_comtal_carro5.to_csv('f_comtal\\survey_comtal_carro5.csv',index=False)