In [1]:
import pandas as pd 
import datetime
import numpy as np

# Data processing. Trajectories

#### 1. Linear interpolation 
#### 2. Average two sensors of the same trolley
#### 3. Re-label time (t* = t + 1 minute) for the moving average of 2 minutes
#### 4. Calculate new temperatures and humidex
#### 5. Moving average with s = 1,...,10 minutes 


In [2]:
def linear_interpolation(df):
    '''
    Function that interpolates linearly the data to have each record separated by 1 second.
    
    It fills the time "gaps" of more than one second with a linear interpolation of the variable (temperature, pressure, etc.). 
    
    '''   
    print('The original data-set has {} records'.format(len(df)))
    
    df.index = df['Time']  # index the column Time
    
    
    
    # delete useful columns
    del df['Time']
    
    column_names = ['Typical Part Size[μm]','CO2[ppm]','mass PM2.5[μg/m3]','number PM0.5[#/cm3]','number PM10[#/cm3]','EAQ[]',
                   'FAQ[]','O3[ppb]','Radiation[]','Gradient[°C/100m]','BT[dBm]','mass PM1.0[μg/m3]','number PM1.0[#/cm3]',
                    'mass PM10[μg/m3]','number PM2.5[#/cm3]','number PM4[#/cm3]']
    
    for name in column_names:
        if name in df:
            del df[name]


    df=df.resample('1S').asfreq().interpolate()    # Resample the index of times every 1 second (1S) and interpolate linearly
    df.reset_index(level=0, inplace=True)  
    
    print('After interpolating, the new data-set has {} records'.format(len(df)))
    print('')
    
    return df    
    

    
    
def average_2_sensors(df1,df2):
    '''
    Function that computes the average quantites of two sensors from the same trolley.
    
    1. We first need to check that they have the same number of records (and therefore start and end at the same time HH:MM:SS).
    2. Afterwards, we compute, at each record (second) the average quantities of latitude, longitude, temperature, pressasure...
    3. We create a new Data-Frame with the averaged quantities (keeping the same time column)
    
    '''    
    
    # Compare the initial and final times of the two sensors
    
    initial_time_df1 = df1['Time'].iloc[0]
    final_time_df1 = df1['Time'].iloc[-1]
    
    initial_time_df2 = df2['Time'].iloc[0]
    final_time_df2 = df2['Time'].iloc[-1]
    
    
    # We have to keep the largest initial time (later) and the smallest final time (earlier)
    # For example, if df1 starts at 13:50:00 and df2 at 13:50:10, then we have to take 13:50:10 for the new data-set, since
    # df2 has no data earlier than 13:50:10. On the other hand, if df1 ends at 14:00:10 and df2 at 14:00:15, then we must take
    # 14:00:10 as the final time for the new data-set, since df1 has no data later than 14:00:10.
    
    if initial_time_df1 < initial_time_df2:   
        initial_time_df = initial_time_df2 
    
    elif initial_time_df1 > initial_time_df2:
        initial_time_df = initial_time_df1
        
    else: 
        initial_time_df = initial_time_df1  # it is irrelevant to take df1 or df2 because both initial times are the same
        
    
    if final_time_df1 < final_time_df2:
        final_time_df = final_time_df1
    
    elif final_time_df1 > final_time_df2:
        final_time_df = final_time_df2
        
    else: 
        final_time_df = final_time_df1  # it is irrelevant to take df1 or df2 because both final times are the same
    
    
    print('The data-set 1 has {} records, starts at {} and ends at {}'.format(len(df1),initial_time_df1,final_time_df1))
    print('The data-set 2 has {} records, starts at {} and ends at {}'.format(len(df2),initial_time_df2,final_time_df2))
    
    print('So the new data-set starts at {} and ends at {}'.format(initial_time_df,final_time_df))
    
    
    # Cut the data-sets df1 and df2 with the new initial and final times

    df1 = df1.loc[(df1['Time'] >= initial_time_df) & (df1['Time'] <= final_time_df )].reset_index()
    del df1['index']
    
    df2 = df2.loc[(df2['Time'] >= initial_time_df) & (df2['Time'] <= final_time_df )].reset_index()
    del df2['index']
        
    
    if len(df1) == len(df2):
        print('Now both data-sets have the same number of records, which is {}'.format(len(df1)))
    
    print('')
    
    # Average each column in a new data-frame
    
    df = pd.DataFrame()
    df['Time'] = df1['Time']
    df['Lat'] = (df1['Lat'] +  df2['Lat'] ) / 2
    df['Lon'] = (df1['Lon'] +  df2['Lon'] ) / 2
    df['Temp[°C]'] = (df1['Temp[°C]'] +  df2['Temp[°C]'] ) / 2
    df['Hum[%]'] = (df1['Hum[%]'] +  df2['Hum[%]'] ) / 2
    df['Alt[m]'] = (df1['Alt[m]'] +  df2['Alt[m]'] ) / 2
    df['Press[mbar]'] = (df1['Press[mbar]'] +  df2['Press[mbar]'] ) / 2
    df['HDX[°C]'] = (df1['HDX[°C]'] +  df2['HDX[°C]'] ) / 2
    df['Speed[km/h]'] = (df1['Speed[km/h]'] +  df2['Speed[km/h]'] ) / 2
    df['DP[°C]'] = (df1['DP[°C]'] +  df2['DP[°C]'] ) /2
    
    if 'θ[K]' in df:
        df['θ[K]'] = (df1['θ[K]'] +  df2['θ[K]']  ) / 2
    
    print('')
    
    return df
    
    
    

def relabel_time(df):
    '''
    Function that re-labels the "Time" column 1 minute (to the right, so in advance). 
    The new time of each record is  t(i)* = [ t(i) + 2min ] / 2 
    In order words:  t(i)* = t(i) + 1 min
    
    This is done for the case of using a moving average of 2 minutes (so the new time is reballed just in the middle)
    
    '''
    
    df['Time(s=2)'] =  pd.to_datetime(df['Time']) + pd.Timedelta(hours=0, minutes=1, seconds=0)   
    
    
    return df



def new_temperatures_and_humidex(df1,df_fixed):
    '''
    Function that recalculates two new variables for the temperature and two for the humidex.
    
    The new quantites are:
        1. Substracting the temperature of the fixed-sensor T(t) - T_fixed(t)
        2. Same as in 1. but then adding the average temperature of the whole trajectory of the fixed sensor:
                T*(t) = T(t) - T_fixed(t) + <T_fixed>
    
    And the same for Humidex (HDX).
    
    To do that, we first need to have both data-sets (the fixed sensor and the trajectory) with the same number of records
    and starting and ending at the exact time. 
    
    '''
    
    # Compare the initial and final times of the two sensors
    
    initial_time_df1 = df1['Time'].iloc[0]
    final_time_df1 = df1['Time'].iloc[-1]
    
    initial_time_df_fixed = df_fixed['Time'].iloc[0]
    final_time_df_fixed = df_fixed['Time'].iloc[-1]
    
    
    # We have to keep the largest initial time (later) and the smallest final time (earlier), as in the case above.
        
    if initial_time_df1 < initial_time_df_fixed:   
        initial_time_df = initial_time_df_fixed
    
    elif initial_time_df1 > initial_time_df_fixed:
        initial_time_df = initial_time_df1
        
    else: 
        initial_time_df = initial_time_df1  # it is irrelevant to take df1 or df_fixed because both initial times are the same
        
    
    if final_time_df1 < final_time_df_fixed:
        final_time_df = final_time_df1
    
    elif final_time_df1 > final_time_df_fixed:
        final_time_df = final_time_df_fixed
        
    else: 
        final_time_df = final_time_df1  # it is irrelevant to take df1 or df_fixed because both final times are the same
    
    
    print('The data-set 1 has {} records, starts at {} and ends at {}'.format(len(df1),initial_time_df1,final_time_df1))
    print('The data-set 2 has {} records, starts at {} and ends at {}'.format(len(df_fixed),initial_time_df_fixed,
                                                                              final_time_df_fixed))
    
    print('So the new data-set starts at {} and ends at {}'.format(initial_time_df,final_time_df))       
        
        
    
    # Cut the data-sets df1 and df2 with the new initial and final times

    df1 = df1.loc[(df1['Time'] >= initial_time_df) & (df1['Time'] <= final_time_df )].reset_index()
    del df1['index']
    
    df_fixed = df_fixed.loc[(df_fixed['Time'] >= initial_time_df) & (df_fixed['Time'] <= final_time_df )].reset_index()
    del df_fixed['index']
        
    
    if len(df1) == len(df_fixed):
        print('Now both data-sets have the same number of records, which is {}'.format(len(df1)))  
        
        
        
    # We create two new columns substracting the temperature and the humidex of the fixed sensor (and add to df1)
    
    df1['T-T_fixed'] = df1['Temp[°C]'] - df_fixed['Temp[°C]']
    df1['HDX-HDX_fixed'] = df1['HDX[°C]'] - df_fixed['HDX[°C]']
        
        
    # We create two new columns substracting the temperature and the humidex of the fixed sensor and adding
    # the average T (and HDX) of the fixed sensor over the whole data-set (and add to df1)
    
    avg_T_fixed = df_fixed['Temp[°C]'].mean()
    avg_HDX_fixed = df_fixed['HDX[°C]'].mean()
    
    df1['T-T_fixed+<T>'] = df1['Temp[°C]'] - df_fixed['Temp[°C]'] + avg_T_fixed
    df1['HDX-HDX_fixed+<HDX>'] = df1['HDX[°C]'] - df_fixed['HDX[°C]'] + avg_HDX_fixed
    
    print('')
    
    return df1
    
    
    
        
def moving_avg(window_size, df,temp):
    '''
    Function that computes the moving average with a window size of "window_size" seconds. 
    
    The variable "temp" is a string, which is the name of the column of the variable we want to average (for example 'T').
    
    The moving average is calculated in a time-advanced way. That is, the new variable "temp*" at the timestamp t(i) is
    the result of averaging all "temp" in the window size of [t(i), t(i)+s]. 
    
    For example, if s = 2 seconds:
    
    t(0) --> T(0) --> T(0)* = [T(0)+T(1)+T(2)] / 3
    t(1) --> T(1) --> T(1)* = [T(1)+T(2)+T(3)] / 3
    ...
    
    Therefore, the general formula is, for a given position "i":
    
                        T*(i) = (1/(s+1)) * sum(from j=0 to s) T(j+i)
                        
    where "s" is the time window. 
    
    '''
    
    i=0
    moving_averages = []

    while i < len(df) - window_size + 1:

        # Variable T, which can be the temperature T, T-Tfixed, HDX...
        T = df[temp].tolist()
             
        # Store elements from i to i+window_size
        T_window = T[i : i + window_size]

        # Average T of the current window
        T_window_avg = sum(T_window) / window_size

        # Store the current avg T window
        moving_averages.append(T_window_avg)

        # Shift window to right by one position
        i += 1


    # Add NaN values at the end of the list in order to add as a new column in the DataFrame
    j=0
    while len(moving_averages) < len(df):
        moving_averages.append(np.nan)
        j += 1    
        
        
    # New column using the variable "temp" and the window_size for the name of the column
    
    df['avg_moving_'+temp+'_'+str(window_size-1)+'s'] = moving_averages

    return df

# Example with data from Fundació Comtal

    - Date of the experiment: 11/07/2024
    - 5 trolleys with 2 sensors per trolley
    - 1 fixed trolley with 2 sensors

##  0. Read the data-sets

In [90]:
# Trolley 1
df_comtal_carro1_sensor1 = pd.read_csv('f_comtal\\comtal_10juliol2024_carro1_sensor1.csv')
df_comtal_carro1_sensor1.drop_duplicates(subset='Time', keep='first', inplace=True, ignore_index=True)
df_comtal_carro1_sensor1['Time'] = pd.to_datetime(df_comtal_carro1_sensor1['Time'], format='%Y-%m-%dT%H:%M:%S%z') 
df_comtal_carro1_sensor1['Time'] = df_comtal_carro1_sensor1['Time'].dt.tz_localize(None)

df_comtal_carro1_sensor2 = pd.read_csv('f_comtal\\comtal_10juliol2024_carro1_sensor2.csv')
df_comtal_carro1_sensor2.drop_duplicates(subset='Time', keep='first', inplace=True, ignore_index=True)
df_comtal_carro1_sensor2['Time'] = pd.to_datetime(df_comtal_carro1_sensor2['Time'], format='%Y-%m-%dT%H:%M:%S%z') 
df_comtal_carro1_sensor2['Time'] = df_comtal_carro1_sensor2['Time'].dt.tz_localize(None)



# Trolley 2
df_comtal_carro2_sensor3 = pd.read_csv('f_comtal\\comtal_10juliol2024_carro2_sensor3.csv')
df_comtal_carro2_sensor3.drop_duplicates(subset='Time', keep='first', inplace=True, ignore_index=True)
df_comtal_carro2_sensor3['Time'] = pd.to_datetime(df_comtal_carro2_sensor3['Time'], format='%Y-%m-%dT%H:%M:%S%z') 
df_comtal_carro2_sensor3['Time'] = df_comtal_carro2_sensor3['Time'].dt.tz_localize(None)

df_comtal_carro2_sensor4 = pd.read_csv('f_comtal\\comtal_10juliol2024_carro2_sensor4.csv')
df_comtal_carro2_sensor4.drop_duplicates(subset='Time', keep='first', inplace=True, ignore_index=True)
df_comtal_carro2_sensor4['Time'] = pd.to_datetime(df_comtal_carro2_sensor4['Time'], format='%Y-%m-%dT%H:%M:%S%z') 
df_comtal_carro2_sensor4['Time'] = df_comtal_carro2_sensor4['Time'].dt.tz_localize(None)



# Trolley 3
df_comtal_carro3_sensor5 = pd.read_csv('f_comtal\\comtal_10juliol2024_carro3_sensor5.csv')
df_comtal_carro3_sensor5.drop_duplicates(subset='Time', keep='first', inplace=True, ignore_index=True)
df_comtal_carro3_sensor5['Time'] = pd.to_datetime(df_comtal_carro3_sensor5['Time'], format='%Y-%m-%dT%H:%M:%S%z') 
df_comtal_carro3_sensor5['Time'] = df_comtal_carro3_sensor5['Time'].dt.tz_localize(None)

df_comtal_carro3_sensor18 = pd.read_csv('f_comtal\\comtal_10juliol2024_carro3_sensor18.csv')
df_comtal_carro3_sensor18.drop_duplicates(subset='Time', keep='first', inplace=True, ignore_index=True)
df_comtal_carro3_sensor18['Time'] = pd.to_datetime(df_comtal_carro3_sensor18['Time'], format='%Y-%m-%dT%H:%M:%S%z') 
df_comtal_carro3_sensor18['Time'] = df_comtal_carro3_sensor18['Time'].dt.tz_localize(None)



# Trolley 4
df_comtal_carro4_sensor7 = pd.read_csv('f_comtal\\comtal_10juliol2024_carro4_sensor7.csv')
df_comtal_carro4_sensor7.drop_duplicates(subset='Time', keep='first', inplace=True, ignore_index=True)
df_comtal_carro4_sensor7['Time'] = pd.to_datetime(df_comtal_carro4_sensor7['Time'], format='%Y-%m-%dT%H:%M:%S%z') 
df_comtal_carro4_sensor7['Time'] = df_comtal_carro4_sensor7['Time'].dt.tz_localize(None)

df_comtal_carro4_sensor8 = pd.read_csv('f_comtal\\comtal_10juliol2024_carro4_sensor8.csv')
df_comtal_carro4_sensor8.drop_duplicates(subset='Time', keep='first', inplace=True, ignore_index=True)
df_comtal_carro4_sensor8['Time'] = pd.to_datetime(df_comtal_carro4_sensor8['Time'], format='%Y-%m-%dT%H:%M:%S%z') 
df_comtal_carro4_sensor8['Time'] = df_comtal_carro4_sensor8['Time'].dt.tz_localize(None)



# Trolley 5
df_comtal_carro5_sensor9 = pd.read_csv('f_comtal\\comtal_10juliol2024_carro5_sensor9.csv')
df_comtal_carro5_sensor9.drop_duplicates(subset='Time', keep='first', inplace=True, ignore_index=True)
df_comtal_carro5_sensor9['Time'] = pd.to_datetime(df_comtal_carro5_sensor9['Time'], format='%Y-%m-%dT%H:%M:%S%z') 
df_comtal_carro5_sensor9['Time'] = df_comtal_carro5_sensor9['Time'].dt.tz_localize(None)

df_comtal_carro5_sensor10 = pd.read_csv('f_comtal\\comtal_10juliol2024_carro5_sensor10.csv')
df_comtal_carro5_sensor10.drop_duplicates(subset='Time', keep='first', inplace=True, ignore_index=True)
df_comtal_carro5_sensor10['Time'] = pd.to_datetime(df_comtal_carro5_sensor10['Time'], format='%Y-%m-%dT%H:%M:%S%z') 
df_comtal_carro5_sensor10['Time'] = df_comtal_carro5_sensor10['Time'].dt.tz_localize(None)



# Trolley fixed
df_comtal_carro_fix_sensor15 = pd.read_csv('f_comtal\\comtal_10juliol2024_carro_fix_sensor15.csv')
df_comtal_carro_fix_sensor15.drop_duplicates(subset='Time', keep='first', inplace=True, ignore_index=True)
df_comtal_carro_fix_sensor15['Time'] = pd.to_datetime(df_comtal_carro_fix_sensor15['Time'], format='%Y-%m-%dT%H:%M:%S%z') 
df_comtal_carro_fix_sensor15['Time'] = df_comtal_carro_fix_sensor15['Time'].dt.tz_localize(None)

df_comtal_carro_fix_sensor17 = pd.read_csv('f_comtal\\comtal_10juliol2024_carro_fix_sensor17.csv')
df_comtal_carro_fix_sensor17.drop_duplicates(subset='Time', keep='first', inplace=True, ignore_index=True)
df_comtal_carro_fix_sensor17['Time'] = pd.to_datetime(df_comtal_carro_fix_sensor17['Time'], format='%Y-%m-%dT%H:%M:%S%z') 
df_comtal_carro_fix_sensor17['Time'] = df_comtal_carro_fix_sensor17['Time'].dt.tz_localize(None)

## 1. Linear interpolation

All the records equally spaced by 1 second

In [91]:
df_comtal_carro1_sensor1 = linear_interpolation(df_comtal_carro1_sensor1)
df_comtal_carro1_sensor2 = linear_interpolation(df_comtal_carro1_sensor2)

df_comtal_carro2_sensor3 = linear_interpolation(df_comtal_carro2_sensor3)
df_comtal_carro2_sensor4 = linear_interpolation(df_comtal_carro2_sensor4)

df_comtal_carro3_sensor5 = linear_interpolation(df_comtal_carro3_sensor5)
df_comtal_carro3_sensor18 = linear_interpolation(df_comtal_carro3_sensor18)

df_comtal_carro4_sensor7 = linear_interpolation(df_comtal_carro4_sensor7)
df_comtal_carro4_sensor8 = linear_interpolation(df_comtal_carro4_sensor8)

df_comtal_carro5_sensor9 = linear_interpolation(df_comtal_carro5_sensor9)
df_comtal_carro5_sensor10 = linear_interpolation(df_comtal_carro5_sensor10)

df_comtal_carro_fix_sensor15 = linear_interpolation(df_comtal_carro_fix_sensor15)
df_comtal_carro_fix_sensor17 = linear_interpolation(df_comtal_carro_fix_sensor17)

The original data-set has 4879 records
After interpolating, the new data-set has 6914 records

The original data-set has 4898 records
After interpolating, the new data-set has 6914 records

The original data-set has 4897 records
After interpolating, the new data-set has 6949 records

The original data-set has 4902 records
After interpolating, the new data-set has 6945 records

The original data-set has 5068 records
After interpolating, the new data-set has 7189 records

The original data-set has 4999 records
After interpolating, the new data-set has 7070 records

The original data-set has 5080 records
After interpolating, the new data-set has 7225 records

The original data-set has 5065 records
After interpolating, the new data-set has 7215 records

The original data-set has 5329 records
After interpolating, the new data-set has 7539 records

The original data-set has 5313 records
After interpolating, the new data-set has 7526 records

The original data-set has 5350 records
After inter

##  2. Average two sensors of the same trolley

Create a new data-set with the average quantites (coordinates, temperature, humidity...) of the two sensors from the same trolley, using the later start time and the earlier end time of the two. 

In [92]:
df_comtal_carro1 = average_2_sensors(df_comtal_carro1_sensor1,df_comtal_carro1_sensor2)

df_comtal_carro2 = average_2_sensors(df_comtal_carro2_sensor3,df_comtal_carro2_sensor4)

df_comtal_carro3 = average_2_sensors(df_comtal_carro3_sensor5,df_comtal_carro3_sensor18)

df_comtal_carro4 = average_2_sensors(df_comtal_carro4_sensor7,df_comtal_carro4_sensor8)

df_comtal_carro5 = average_2_sensors(df_comtal_carro5_sensor9,df_comtal_carro5_sensor10)

df_comtal_carro_fix = average_2_sensors(df_comtal_carro_fix_sensor15,df_comtal_carro_fix_sensor17)

The data-set 1 has 6914 records, starts at 2024-07-10 14:51:08 and ends at 2024-07-10 16:46:21
The data-set 2 has 6914 records, starts at 2024-07-10 14:51:12 and ends at 2024-07-10 16:46:25
So the new data-set starts at 2024-07-10 14:51:12 and ends at 2024-07-10 16:46:21
Now both data-sets have the same number of records, which is 6910


The data-set 1 has 6949 records, starts at 2024-07-10 14:50:43 and ends at 2024-07-10 16:46:31
The data-set 2 has 6945 records, starts at 2024-07-10 14:50:50 and ends at 2024-07-10 16:46:34
So the new data-set starts at 2024-07-10 14:50:50 and ends at 2024-07-10 16:46:31
Now both data-sets have the same number of records, which is 6942


The data-set 1 has 7189 records, starts at 2024-07-10 14:47:04 and ends at 2024-07-10 16:46:52
The data-set 2 has 7070 records, starts at 2024-07-10 14:49:04 and ends at 2024-07-10 16:46:53
So the new data-set starts at 2024-07-10 14:49:04 and ends at 2024-07-10 16:46:52
Now both data-sets have the same number of recor

## 3. Re-label time (t* = t + 1 minute) for the moving average of 2 minutes
Create a new column with the time advanced 1 minute. This is done because we perform a moving average of 2 minutes (then the new timestamp is in the middle). 


In [93]:
df_comtal_carro1 = relabel_time(df_comtal_carro1)
df_comtal_carro2 = relabel_time(df_comtal_carro2)
df_comtal_carro3 = relabel_time(df_comtal_carro3)
df_comtal_carro4 = relabel_time(df_comtal_carro4)
df_comtal_carro5 = relabel_time(df_comtal_carro5)
df_comtal_carro_fix = relabel_time(df_comtal_carro_fix)

## 4. Calculate new temperatures and humidex
Create 4 new columns (2 for temperature and 2 for humidex), which are:
    
    1. T - T_fixed. Substracting the temperature of the fixed sensor at each second (location)
    2. T - T_fixed + <T>. Same as 1, but then adding the average temperature of the fixed sensor (over all time)   
    3. HDX - HDX_fixed. Substracting the humidex of the fixed sensor at each second (location)
    3. HDX - HDX_fixed + <HDX>. Same as 1, but then adding the average humidex of the fixed sensor (over all time)

In [94]:
df_comtal_carro1 = new_temperatures_and_humidex(df_comtal_carro1,df_comtal_carro_fix)
df_comtal_carro2 = new_temperatures_and_humidex(df_comtal_carro2,df_comtal_carro_fix)
df_comtal_carro3 = new_temperatures_and_humidex(df_comtal_carro3,df_comtal_carro_fix)
df_comtal_carro4 = new_temperatures_and_humidex(df_comtal_carro4,df_comtal_carro_fix)
df_comtal_carro5 = new_temperatures_and_humidex(df_comtal_carro5,df_comtal_carro_fix)

The data-set 1 has 6910 records, starts at 2024-07-10 14:51:12 and ends at 2024-07-10 16:46:21
The data-set 2 has 7389 records, starts at 2024-07-10 14:44:16 and ends at 2024-07-10 16:47:24
So the new data-set starts at 2024-07-10 14:51:12 and ends at 2024-07-10 16:46:21
Now both data-sets have the same number of records, which is 6910

The data-set 1 has 6942 records, starts at 2024-07-10 14:50:50 and ends at 2024-07-10 16:46:31
The data-set 2 has 7389 records, starts at 2024-07-10 14:44:16 and ends at 2024-07-10 16:47:24
So the new data-set starts at 2024-07-10 14:50:50 and ends at 2024-07-10 16:46:31
Now both data-sets have the same number of records, which is 6942

The data-set 1 has 7069 records, starts at 2024-07-10 14:49:04 and ends at 2024-07-10 16:46:52
The data-set 2 has 7389 records, starts at 2024-07-10 14:44:16 and ends at 2024-07-10 16:47:24
So the new data-set starts at 2024-07-10 14:49:04 and ends at 2024-07-10 16:46:52
Now both data-sets have the same number of records


## 5. Moving average with s = 1,...,10 minutes 


In [95]:
temps = ['Temp[°C]','HDX[°C]','T-T_fixed','HDX-HDX_fixed','T-T_fixed+<T>','HDX-HDX_fixed+<HDX>']
window_sizes = [61,121,181,241,301,361,421,481,541,601]

for temp in temps:
    for window_size in window_sizes:
        df_comtal_carro1 = moving_avg(window_size, df_comtal_carro1,temp)
        df_comtal_carro2 = moving_avg(window_size, df_comtal_carro2,temp)
        df_comtal_carro3 = moving_avg(window_size, df_comtal_carro3,temp)
        df_comtal_carro4 = moving_avg(window_size, df_comtal_carro4,temp)
        df_comtal_carro5 = moving_avg(window_size, df_comtal_carro5,temp)

## Save new data-frames

In [96]:
#df_comtal_carro1.to_csv('f_comtal\\df_comtal_carro1.csv',index=False)
#df_comtal_carro2.to_csv('f_comtal\\df_comtal_carro2.csv',index=False)
#df_comtal_carro3.to_csv('f_comtal\\df_comtal_carro3.csv',index=False)
#df_comtal_carro4.to_csv('f_comtal\\df_comtal_carro4.csv',index=False)
#df_comtal_carro5.to_csv('f_comtal\\df_comtal_carro5.csv',index=False)
#df_comtal_carro_fix.to_csv('f_comtal\\df_comtal_carro_fix.csv',index=False)