In [569]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
%matplotlib inline

In [570]:
df_P = pd.io.parsers.read_csv(filepath_or_buffer = 'data/D8_filt_10mino3.csv',index_col = 0)
print df_P[:5]

                       O3_ppb      UnixTime      e2v03       Temp         Rh  \
DATE (MM/DD/YYYY)_MST                                                          
2014-07-13 02:22:00        38  1.405218e+09  91.000000  27.100000  41.800000   
2014-07-13 02:23:00        37  1.405218e+09  91.666667  27.133333  41.933333   
2014-07-13 02:24:00        39  1.405218e+09  91.000000  27.100000  41.966667   
2014-07-13 02:25:00        40  1.405218e+09  90.333333  27.100000  42.200000   
2014-07-13 02:26:00        41  1.405218e+09  88.666667  27.100000  42.066667   

                       Zenith Angle [degrees]  days from start  ref_o3_smooth  \
DATE (MM/DD/YYYY)_MST                                                           
2014-07-13 02:22:00                 110.80431                0             38   
2014-07-13 02:23:00                 110.69766                0             37   
2014-07-13 02:24:00                 110.59040                0             39   
2014-07-13 02:25:00               

In [571]:
df_P.drop(df_P.columns[[7,8]], axis=1, inplace=True)
print df_P[:5]

                       O3_ppb      UnixTime      e2v03       Temp         Rh  \
DATE (MM/DD/YYYY)_MST                                                          
2014-07-13 02:22:00        38  1.405218e+09  91.000000  27.100000  41.800000   
2014-07-13 02:23:00        37  1.405218e+09  91.666667  27.133333  41.933333   
2014-07-13 02:24:00        39  1.405218e+09  91.000000  27.100000  41.966667   
2014-07-13 02:25:00        40  1.405218e+09  90.333333  27.100000  42.200000   
2014-07-13 02:26:00        41  1.405218e+09  88.666667  27.100000  42.066667   

                       Zenith Angle [degrees]  days from start  
DATE (MM/DD/YYYY)_MST                                           
2014-07-13 02:22:00                 110.80431                0  
2014-07-13 02:23:00                 110.69766                0  
2014-07-13 02:24:00                 110.59040                0  
2014-07-13 02:25:00                 110.48252                0  
2014-07-13 02:26:00                 110.37404    

####Define the slope function.

In [572]:
def find_lag_slope(df_P, int_min, data_col):
    df_P['diff'] = df_P[data_col].diff()
    
    slope = []
    for i in range(int_min,len(df_P['diff'])):
        top = i-int_min
        slope.append(df_P['diff'][top:i].mean())
    
    for i in range(0,int_min):
        slope.insert(i, 'NaN')

    return slope

In [573]:
def find_lead_slope(df_P, int_min, data_col):
    df_P['diff'] = df_P[data_col].diff()
    slope = []
    for i in range(0,len(df_P[data_col])-int_min):
        top = i + int_min
        slope.append(df_P['diff'][i:top].mean())
    
    for i in range(len(df_P[data_col])-int_min, len(df_P[data_col])):
        slope.insert(i, 'NaN')

    return slope

####Define functions that create features that represent the area under the temp and humidity curves.

In [574]:
def find_lag_integral(df_P, int_min, data_col):
    area_curve = []

    for i in range(int_min,len(df_P[data_col])):
        top = i - int_min
        area_curve.append(np.trapz(df_P[data_col][top:i]))
    
    for i in range(0,int_min):
        area_curve.insert(i, 'NaN')
    
    return area_curve

In [575]:
def find_lead_integral(df_P, int_min, data_col):
    area_curve = []

    for i in range(0,len(df_P[data_col])-int_min):
        top = i + int_min
        area_curve.append(np.trapz(df_P[data_col][i:top]))
    
    for i in range(len(df_P[data_col])-int_min, len(df_P[data_col])):
        area_curve.insert(i, 'NaN')
    
    return area_curve

In [576]:
#relative start time must be less than relative end time
def sliding_integral_lag(df_P, start_min_before, end_min_before, column):
    interval = start_min_before - end_min_before 
    a = df_P['e2v03'].shift(end_min_before).values
    v = np.zeros(interval)
    v[0:interval] = 1
    out = np.convolve(a, v, 'valid')
    out = np.concatenate((np.array([float('nan')] * (interval-1)), out))
    return out

In [577]:
#relative start time must be less than relative end time
def sliding_integral_lead(df_P, start_min_after, end_min_after, column):
    interval = end_min_after - start_min_after 
    a = df_P[column].shift(start_min_after).values
    a = a[~np.isnan(a)]
    v = np.zeros(end_min_after)
    v[0:interval] = 1
    out = np.convolve(a, v, 'valid')
    out = np.concatenate((out, np.array([float('nan')] * (start_min_after+end_min_after-1))))
    return out

###Call the slope functions and add them to the dataframe.

In [578]:
def make_func_caller_find_lag_integral(df_P, min_time, max_time, interval, column):
    i = min_time
    while i <= max_time:    
        df_P[str(column) + '_int' + '_lag_' + str(i)] = find_lag_integral(df_P, i, column)      
        i += interval
    return df_P  

def make_func_caller_find_lead_integral(df_P, min_time, max_time, interval, column):
    i = min_time
    while i <= max_time:    
        df_P[str(column) + '_int' + '_lead_' + str(i)] = find_lead_integral(df_P, i, column)      
        i += interval
    return df_P 

def make_func_caller_sliding_integral_lag(df_P, max_end_min_before, min_end_min_before, frequency, interval_size, column):
    i = min_end_min_before
    while i <= max_end_min_before:
        start = i + interval_size
        df_P[str(column) + '_int' + '_slide_' + str(start) + '_to_' + str(i) + '_lag'] = sliding_integral_lag(df_P, start, i, column)      
        i += frequency
    return df_P 

def make_func_caller_sliding_integral_lead(df_P, min_start_min_after, max_start_min_after, frequency, interval_size, column):
    i = min_start_min_after
    while i <= max_start_min_after:
        start = i + interval_size
        df_P[str(column) + '_int' + '_slide_' + str(i) + '_to_' + str(start) + '_lead'] = sliding_integral_lead(df_P, start, i, column)      
        i += frequency
    return df_P 

def make_func_caller_find_lag_slope(df_P, min_time, max_time, interval, column):
    i = min_time
    while i <= max_time:    
        df_P[str(column) + '_slope' + '_lag_' + str(i)] = find_lag_slope(df_P, i, column)      
        i += interval
    return df_P 

def make_func_caller_find_lead_slope(df_P, min_time, max_time, interval, column):
    i = min_time
    while i <= max_time:    
        df_P[str(column) + '_slope' + '_lead_' + str(i)] = find_lead_slope(df_P, i, column)      
        i += interval
    return df_P 

def make_func_caller_find_sliding_slope(df_P, start_bef_or_aft_point, end_bef_or_aft_point, size, column):
    i = size
    while i <= size:    
        df_P[str(column) + '_slope' + '_slide_' + str(i)] = find_sliding_slope(df_P, start_bef_or_aft_point, end_bef_or_aft_point, i, column)      
        i += size
    return df_P

In [579]:
#ozone terms
df_P['O3_sqrt'] = np.sqrt(df_P['e2v03'].astype(float))
df_P['O3_sq'] = df_P['e2v03']**2
df_P['O3_cu'] = df_P['e2v03']**3
df_P['ln_O3'] = np.log(df_P['e2v03'])

#temp terms
df_P['temp_sqrt'] = np.sqrt(df_P['Temp'].astype(float))
df_P['temp_sq'] = df_P['Temp']**2
df_P['temp_cu'] = df_P['Temp']**3
df_P['ln_temp'] =np.log(df_P['Temp'])

#rh terms
df_P['rh_sqrt'] = np.sqrt(df_P['Rh'].astype(float))
df_P['rh_sq'] = df_P['Rh']**2
df_P['rh_cu'] = df_P['Rh']**3
df_P['ln_rh'] = np.log(df_P['Rh'])

#ozone/temp interactions
df_P['03_mult_temp'] = df_P['e2v03']*df_P['Temp']
df_P['03_sq_mult_temp_sq'] = ((df_P['e2v03'])**2)*df_P['Temp']**2
df_P['ln_03_mult_ln_temp'] = np.log(df_P['Temp'])*np.log(df_P['e2v03'])

#ozone/rh interactions
df_P['03_mult_rh'] = df_P['e2v03']*df_P['Rh']
df_P['03_sq_mult_rh_sq'] = (df_P['e2v03']**2)*df_P['Rh']**2
df_P['ln_03_mult_ln_rh'] = np.log(df_P['Rh'])*np.log(df_P['e2v03'])

#temp/rh interactions
df_P['temp_mult_rh'] = df_P['Temp']*df_P['Rh']
df_P['temp_sq_mult_rh_sq'] = (df_P['Temp']**2)*df_P['Rh']**2
df_P['ln_temp_mult_ln_rh'] = np.log(df_P['Temp'])*np.log(df_P['Rh'])

#ozone/rh/temp interactions
df_P['03_mult_rh_&_temp'] = df_P['e2v03']*df_P['Rh']*df_P['Temp']
df_P['03_sq_mult_rh_sq_&_temp_sq'] = (df_P['e2v03']**2)*(df_P['Rh']**2)*df_P['Temp']**2
df_P['ln_03_mult_ln_rh_&_ln_temp'] = np.log(df_P['Temp'])*np.log(df_P['Rh'])*np.log(df_P['e2v03'])

#call the lag int function for o3, temp, and rh
make_func_caller_find_lag_integral(df_P, 5, 120, 15, 'e2v03')
make_func_caller_find_lag_integral(df_P, 5, 120, 15, 'ln_O3')
make_func_caller_find_lag_integral(df_P, 5, 120, 15, 'Temp')
make_func_caller_find_lag_integral(df_P, 5, 120, 15, 'ln_temp')
make_func_caller_find_lag_integral(df_P, 5, 120, 15, 'Rh')
make_func_caller_find_lag_integral(df_P, 5, 120, 15, 'ln_rh')

#call the lead int for o3, temp, and rh
make_func_caller_find_lead_integral(df_P, 5, 120, 15, 'e2v03')
make_func_caller_find_lead_integral(df_P, 5, 120, 15, 'ln_O3')
make_func_caller_find_lead_integral(df_P, 5, 120, 15, 'Temp')
make_func_caller_find_lead_integral(df_P, 5, 120, 15, 'ln_temp')
make_func_caller_find_lead_integral(df_P, 5, 120, 15, 'Rh')
make_func_caller_find_lead_integral(df_P, 5, 120, 15, 'ln_rh')

#call the lag sliding integral function
# inputs- (df_P, max_end_min_before, min_end_min_before, frequency, interval_size, column)
make_func_caller_sliding_integral_lag(df_P, 15, 5, 10, 10, 'ln_O3')
make_func_caller_sliding_integral_lag(df_P, 20, 5, 10, 10, 'e2v03')
make_func_caller_sliding_integral_lag(df_P, 20, 5, 10, 10, 'Temp')
make_func_caller_sliding_integral_lag(df_P, 20, 5, 10, 10, 'ln_temp')
make_func_caller_sliding_integral_lag(df_P, 20, 5, 10, 10, 'Rh')
make_func_caller_sliding_integral_lag(df_P, 20, 5, 10, 10, 'ln_rh')

#call the lead sliding integral function
# inputs- (df_P, min_start_min_after, max_start_min_after, frequency, interval_size, column)
make_func_caller_sliding_integral_lead(df_P, 2, 10, 10, 10, 'e2v03')
make_func_caller_sliding_integral_lead(df_P, 2, 10, 10, 10, 'ln_O3')
make_func_caller_sliding_integral_lead(df_P, 2, 10, 10, 10, 'Temp')
make_func_caller_sliding_integral_lead(df_P, 2, 10, 10, 10, 'ln_temp')
make_func_caller_sliding_integral_lead(df_P, 2, 10, 10, 10, 'Rh')
make_func_caller_sliding_integral_lead(df_P, 2, 10, 10, 10, 'ln_rh')

#call the lag slope for o3, temp, and rh
make_func_caller_find_lag_slope(df_P, 5, 120, 15, 'e2v03')
make_func_caller_find_lag_slope(df_P, 5, 120, 15, 'ln_O3')
make_func_caller_find_lag_slope(df_P, 5, 120, 15, 'Temp')
make_func_caller_find_lag_slope(df_P, 5, 120, 15, 'ln_temp')
make_func_caller_find_lag_slope(df_P, 5, 120, 15, 'Rh')
make_func_caller_find_lag_slope(df_P, 5, 120, 15, 'ln_rh')

#call the lead slope for o3, temp, and rh
make_func_caller_find_lead_slope(df_P, 5, 120, 15, 'e2v03')
make_func_caller_find_lead_slope(df_P, 5, 120, 15, 'ln_O3')
make_func_caller_find_lead_slope(df_P, 5, 120, 15, 'Temp')
make_func_caller_find_lead_slope(df_P, 5, 120, 15, 'ln_temp')
make_func_caller_find_lead_slope(df_P, 5, 120, 15, 'Rh')
make_func_caller_find_lead_slope(df_P, 5, 120, 15, 'ln_rh')

features = list(df_P.columns)
print df_P[:5]

                       O3_ppb      UnixTime      e2v03       Temp         Rh  \
DATE (MM/DD/YYYY)_MST                                                          
2014-07-13 02:22:00        38  1.405218e+09  91.000000  27.100000  41.800000   
2014-07-13 02:23:00        37  1.405218e+09  91.666667  27.133333  41.933333   
2014-07-13 02:24:00        39  1.405218e+09  91.000000  27.100000  41.966667   
2014-07-13 02:25:00        40  1.405218e+09  90.333333  27.100000  42.200000   
2014-07-13 02:26:00        41  1.405218e+09  88.666667  27.100000  42.066667   

                       Zenith Angle [degrees]  days from start   O3_sqrt  \
DATE (MM/DD/YYYY)_MST                                                      
2014-07-13 02:22:00                 110.80431                0  9.539392   
2014-07-13 02:23:00                 110.69766                0  9.574271   
2014-07-13 02:24:00                 110.59040                0  9.539392   
2014-07-13 02:25:00                 110.48252              

In [580]:
features = list(df_P.columns)
features

['O3_ppb',
 'UnixTime',
 'e2v03',
 'Temp',
 'Rh',
 'Zenith Angle [degrees]',
 'days from start',
 'O3_sqrt',
 'O3_sq',
 'O3_cu',
 'ln_O3',
 'temp_sqrt',
 'temp_sq',
 'temp_cu',
 'ln_temp',
 'rh_sqrt',
 'rh_sq',
 'rh_cu',
 'ln_rh',
 '03_mult_temp',
 '03_sq_mult_temp_sq',
 'ln_03_mult_ln_temp',
 '03_mult_rh',
 '03_sq_mult_rh_sq',
 'ln_03_mult_ln_rh',
 'temp_mult_rh',
 'temp_sq_mult_rh_sq',
 'ln_temp_mult_ln_rh',
 '03_mult_rh_&_temp',
 '03_sq_mult_rh_sq_&_temp_sq',
 'ln_03_mult_ln_rh_&_ln_temp',
 'e2v03_int_lag_5',
 'e2v03_int_lag_20',
 'e2v03_int_lag_35',
 'e2v03_int_lag_50',
 'e2v03_int_lag_65',
 'e2v03_int_lag_80',
 'e2v03_int_lag_95',
 'e2v03_int_lag_110',
 'ln_O3_int_lag_5',
 'ln_O3_int_lag_20',
 'ln_O3_int_lag_35',
 'ln_O3_int_lag_50',
 'ln_O3_int_lag_65',
 'ln_O3_int_lag_80',
 'ln_O3_int_lag_95',
 'ln_O3_int_lag_110',
 'Temp_int_lag_5',
 'Temp_int_lag_20',
 'Temp_int_lag_35',
 'Temp_int_lag_50',
 'Temp_int_lag_65',
 'Temp_int_lag_80',
 'Temp_int_lag_95',
 'Temp_int_lag_110',
 'ln_t

In [581]:
#Delete the first and last 115 rows of the dataframe to remove all NaNs.
df_P = df_P.ix[120:len(df_P['e2v03'])-120]

In [582]:
#ozone/temp
df_P['o3_temp_int_lag_35'] = df_P['e2v03_int_lag_35'] * df_P['Temp_int_lag_35'] 
df_P['o3_temp_slope_lag_35'] = df_P['e2v03_slope_lag_35'] * df_P['Temp_slope_lag_35']
df_P['o3_temp_int_lag_65'] = df_P['e2v03_int_lag_65'] * df_P['Temp_int_lag_65'] 
df_P['o3_temp_slope_lag_65'] = df_P['e2v03_slope_lag_65'] * df_P['Temp_slope_lag_65'] 
df_P['o3_temp_int_lag_95'] = df_P['e2v03_int_lag_95'] * df_P['Temp_int_lag_95'] 
df_P['o3_temp_slope_lag_95'] = df_P['e2v03_slope_lag_95'] * df_P['Temp_slope_lag_95']
df_P['o3_temp_int_lag_110'] = df_P['e2v03_int_lag_110'] * df_P['Temp_int_lag_110'] 
df_P['o3_temp_slope_lag_110'] = df_P['e2v03_slope_lag_110'] * df_P['Temp_slope_lag_110']

df_P['ln_o3_temp_int_lag_35'] = df_P['ln_O3_int_lag_35'] * df_P['ln_temp_int_lag_35'] 
df_P['ln_o3_temp_slope_lag_35'] = df_P['ln_O3_slope_lag_35'] * df_P['ln_temp_slope_lag_35']
df_P['ln_o3_temp_int_lag_65'] = df_P['ln_O3_int_lag_65'] * df_P['ln_temp_int_lag_65'] 
df_P['ln_o3_temp_slope_lag_65'] = df_P['ln_O3_slope_lag_65'] * df_P['ln_temp_slope_lag_65'] 
df_P['ln_o3_temp_int_lag_95'] = df_P['ln_O3_int_lag_95'] * df_P['ln_temp_int_lag_95'] 
df_P['ln_o3_temp_slope_lag_95'] = df_P['ln_O3_slope_lag_95'] * df_P['ln_temp_slope_lag_95']
df_P['ln_o3_temp_int_lag_110'] = df_P['ln_O3_int_lag_110'] * df_P['ln_temp_int_lag_110'] 
df_P['ln_o3_temp_slope_lag_110'] = df_P['ln_O3_slope_lag_110'] * df_P['ln_temp_slope_lag_110']

df_P['o3_temp_int_lead_35'] = df_P['e2v03_int_lead_35'] * df_P['Temp_int_lead_35'] 
df_P['o3_temp_slope_lead_35'] = df_P['e2v03_slope_lead_35'] * df_P['Temp_slope_lead_35'] 
df_P['o3_temp_int_lead_65'] = df_P['e2v03_int_lead_65'] * df_P['Temp_int_lead_65'] 
df_P['o3_temp_slope_lead_65'] = df_P['e2v03_slope_lead_65'] * df_P['Temp_slope_lead_65'] 
df_P['o3_temp_int_lead_95'] = df_P['e2v03_int_lead_95'] * df_P['Temp_int_lead_95'] 
df_P['o3_temp_slope_lead_95'] = df_P['e2v03_slope_lead_95'] * df_P['Temp_slope_lead_95']
df_P['o3_temp_int_lead_110'] = df_P['e2v03_int_lead_110'] * df_P['Temp_int_lead_110'] 
df_P['o3_temp_slope_lead_110'] = df_P['e2v03_slope_lead_110'] * df_P['Temp_slope_lead_110']

df_P['ln_o3_temp_int_lead_35'] = df_P['ln_O3_int_lead_35'] * df_P['ln_temp_int_lead_35'] 
df_P['ln_o3_temp_slope_lead_35'] = df_P['ln_O3_slope_lead_35'] * df_P['ln_temp_slope_lead_35'] 
df_P['ln_o3_temp_int_lead_65'] = df_P['ln_O3_int_lead_65'] * df_P['ln_temp_int_lead_65'] 
df_P['ln_o3_temp_slope_lead_65'] = df_P['ln_O3_slope_lead_65'] * df_P['ln_temp_slope_lead_65'] 
df_P['ln_o3_temp_int_lead_95'] = df_P['ln_O3_int_lead_95'] * df_P['ln_temp_int_lead_95'] 
df_P['ln_o3_temp_slope_lead_95'] = df_P['ln_O3_slope_lead_95'] * df_P['ln_temp_slope_lead_95']
df_P['ln_o3_temp_int_lead_110'] = df_P['ln_O3_int_lead_110'] * df_P['ln_temp_int_lead_110'] 
df_P['ln_o3_temp_slope_lead_110'] = df_P['ln_O3_slope_lead_110'] * df_P['ln_temp_slope_lead_110']

#temp/rh
df_P['temp_rh_int_lag_35'] = df_P['Temp_int_lag_35'] * df_P['Rh_int_lag_35'] 
df_P['temp_rh_slope_lag_35'] = df_P['Temp_slope_lag_35'] * df_P['Rh_slope_lag_35'] 
df_P['temp_rh_int_lag_65'] = df_P['Temp_int_lag_65'] * df_P['Rh_int_lag_65'] 
df_P['temp_rh_slope_lag_65'] = df_P['Temp_slope_lag_65'] * df_P['Rh_slope_lag_65'] 
df_P['temp_rh_int_lag_95'] = df_P['Temp_int_lag_95'] * df_P['Rh_int_lag_95'] 
df_P['temp_rh_slope_lag_95'] = df_P['Temp_slope_lag_95'] * df_P['Rh_slope_lag_95'] 
df_P['temp_rh_int_lag_110'] = df_P['Temp_int_lag_110'] * df_P['Rh_int_lag_110'] 
df_P['temp_rh_slope_lag_110'] = df_P['Temp_slope_lag_110'] * df_P['Rh_slope_lag_110']

df_P['ln_temp_rh_int_lag_35'] = df_P['ln_temp_int_lag_35'] * df_P['ln_rh_int_lag_35'] 
df_P['ln_temp_rh_slope_lag_35'] = df_P['ln_temp_slope_lag_35'] * df_P['ln_rh_slope_lag_35'] 
df_P['ln_temp_rh_int_lag_65'] = df_P['ln_temp_int_lag_65'] * df_P['ln_rh_int_lag_65'] 
df_P['ln_temp_rh_slope_lag_65'] = df_P['ln_temp_slope_lag_65'] * df_P['ln_rh_slope_lag_65'] 
df_P['ln_temp_rh_int_lag_95'] = df_P['ln_temp_int_lag_95'] * df_P['ln_rh_int_lag_95'] 
df_P['ln_temp_rh_slope_lag_95'] = df_P['ln_temp_slope_lag_95'] * df_P['ln_rh_slope_lag_95'] 
df_P['ln_temp_rh_int_lag_110'] = df_P['ln_temp_int_lag_110'] * df_P['ln_rh_int_lag_110'] 
df_P['ln_temp_rh_slope_lag_110'] = df_P['ln_temp_slope_lag_110'] * df_P['ln_rh_slope_lag_110']

df_P['temp_rh_int_lead_35'] = df_P['Temp_int_lead_35'] * df_P['Rh_int_lead_35'] 
df_P['temp_rh_slope_lead_35'] = df_P['Temp_slope_lead_35'] * df_P['Rh_slope_lead_35'] 
df_P['temp_rh_int_lead_65'] = df_P['Temp_int_lead_65'] * df_P['Rh_int_lead_65'] 
df_P['temp_rh_slope_lead_65'] = df_P['Temp_slope_lead_65'] * df_P['Rh_slope_lead_65'] 
df_P['temp_rh_int_lead_95'] = df_P['Temp_int_lead_95'] * df_P['Rh_int_lead_95'] 
df_P['temp_rh_slope_lead_95'] = df_P['Temp_slope_lead_95'] * df_P['Rh_slope_lead_95']
df_P['temp_rh_int_lead_110'] = df_P['Temp_int_lead_110'] * df_P['Rh_int_lead_110'] 
df_P['temp_rh_slope_lead_110'] = df_P['Temp_slope_lead_110'] * df_P['Rh_slope_lead_110']

df_P['ln_temp_rh_int_lead_35'] = df_P['ln_temp_int_lead_35'] * df_P['ln_rh_int_lead_35'] 
df_P['ln_temp_rh_slope_lead_35'] = df_P['ln_temp_slope_lead_35'] * df_P['ln_rh_slope_lead_35'] 
df_P['ln_temp_rh_int_lead_65'] = df_P['ln_temp_int_lead_65'] * df_P['ln_rh_int_lead_65'] 
df_P['ln_temp_rh_slope_lead_65'] = df_P['ln_temp_slope_lead_65'] * df_P['ln_rh_slope_lead_65'] 
df_P['ln_temp_rh_int_lead_95'] = df_P['ln_temp_int_lead_95'] * df_P['ln_rh_int_lead_95'] 
df_P['ln_temp_rh_slope_lead_110'] = df_P['ln_temp_slope_lead_110'] * df_P['ln_rh_slope_lead_110']
df_P['ln_temp_rh_int_lead_110'] = df_P['ln_temp_slope_lead_110'] * df_P['ln_rh_slope_lead_110']

#ozone/rh
df_P['o3_rh_int_lag_35'] = df_P['e2v03_int_lag_35'] * df_P['Rh_int_lag_35'] 
df_P['o3_rh_slope_lag_35'] = df_P['e2v03_slope_lag_35'] * df_P['Rh_slope_lag_35'] 
df_P['o3_rh_int_lag_65'] = df_P['e2v03_int_lag_65'] * df_P['Rh_int_lag_65'] 
df_P['o3_rh_slope_lag_65'] = df_P['e2v03_slope_lag_65'] * df_P['Rh_slope_lag_65'] 
df_P['o3_rh_int_lag_95'] = df_P['e2v03_int_lag_95'] * df_P['Rh_int_lag_95'] 
df_P['o3_rh_slope_lag_95'] = df_P['e2v03_slope_lag_95'] * df_P['Rh_slope_lag_95'] 
df_P['o3_rh_int_lag_110'] = df_P['e2v03_int_lag_110'] * df_P['Rh_int_lag_110'] 
df_P['o3_rh_slope_lag_110'] = df_P['e2v03_slope_lag_110'] * df_P['Rh_slope_lag_110'] 

df_P['ln_o3_rh_int_lag_35'] = df_P['ln_O3_int_lag_35'] * df_P['ln_rh_int_lag_35'] 
df_P['ln_o3_rh_slope_lag_35'] = df_P['ln_O3_slope_lag_35'] * df_P['ln_rh_slope_lag_35'] 
df_P['ln_o3_rh_int_lag_65'] = df_P['ln_O3_int_lag_65'] * df_P['ln_rh_int_lag_65'] 
df_P['ln_o3_rh_slope_lag_65'] = df_P['ln_O3_slope_lag_65'] * df_P['ln_rh_slope_lag_65'] 
df_P['ln_o3_rh_int_lag_95'] = df_P['ln_O3_int_lag_95'] * df_P['ln_rh_int_lag_95'] 
df_P['ln_o3_rh_slope_lag_95'] = df_P['ln_O3_slope_lag_95'] * df_P['ln_rh_slope_lag_95'] 
df_P['ln_o3_rh_int_lag_110'] = df_P['ln_O3_int_lag_110'] * df_P['ln_rh_int_lag_110'] 
df_P['ln_o3_rh_slope_lag_110'] = df_P['ln_O3_slope_lag_110'] * df_P['ln_rh_slope_lag_110'] 

df_P['o3_rh_int_lead_35'] = df_P['e2v03_int_lead_35'] * df_P['Rh_int_lead_35'] 
df_P['o3_rh_slope_lead_35'] = df_P['e2v03_slope_lead_35'] * df_P['Rh_slope_lead_35'] 
df_P['o3_rh_int_lead_65'] = df_P['e2v03_int_lead_65'] * df_P['Rh_int_lead_65'] 
df_P['o3_rh_slope_lead_65'] = df_P['e2v03_slope_lead_65'] * df_P['Rh_slope_lead_65'] 
df_P['o3_rh_int_lead_95'] = df_P['e2v03_int_lead_95'] * df_P['Rh_int_lead_95'] 
df_P['o3_rh_slope_lead_95'] = df_P['e2v03_slope_lead_95'] * df_P['Rh_slope_lead_95']
df_P['o3_rh_int_lead_110'] = df_P['e2v03_int_lead_110'] * df_P['Rh_int_lead_110'] 
df_P['o3_rh_slope_lead_110'] = df_P['e2v03_slope_lead_110'] * df_P['Rh_slope_lead_110']

df_P['ln_o3_rh_int_lead_35'] = df_P['ln_O3_int_lead_35'] * df_P['ln_rh_int_lead_35'] 
df_P['ln_o3_rh_slope_lead_35'] = df_P['ln_O3_slope_lead_35'] * df_P['ln_rh_slope_lead_35'] 
df_P['ln_o3_rh_int_lead_65'] = df_P['ln_O3_int_lead_65'] * df_P['ln_rh_int_lead_65'] 
df_P['ln_o3_rh_slope_lead_65'] = df_P['ln_O3_slope_lead_65'] * df_P['ln_rh_slope_lead_65'] 
df_P['ln_o3_rh_int_lead_95'] = df_P['ln_O3_int_lead_95'] * df_P['ln_rh_int_lead_95'] 
df_P['ln_o3_rh_slope_lead_95'] = df_P['ln_O3_slope_lead_95'] * df_P['ln_rh_slope_lead_95']
df_P['ln_o3_rh_int_lead_110'] = df_P['ln_O3_int_lead_110'] * df_P['ln_rh_int_lead_110'] 
df_P['ln_o3_rh_slope_lead_110'] = df_P['ln_O3_slope_lead_110'] * df_P['ln_rh_slope_lead_110']

#ozone/temp/rh
df_P['o3_temp_rh_int_lag_35'] = df_P['Temp_int_lag_35'] * df_P['Rh_int_lag_35']*df_P['e2v03_int_lag_35'] 
df_P['o3_temp_rh_slope_lag_35'] = df_P['Temp_slope_lag_35'] * df_P['Rh_slope_lag_35'] * df_P['e2v03_slope_lag_35'] 
df_P['o3_temp_rh_int_lag_65'] = df_P['Temp_int_lag_65'] * df_P['Rh_int_lag_65']*df_P['e2v03_int_lag_65'] 
df_P['o3_temp_rh_slope_lag_65'] = df_P['Temp_slope_lag_65'] * df_P['Rh_slope_lag_65'] * df_P['e2v03_slope_lag_65'] 
df_P['o3_temp_rh_int_lag_95'] = df_P['Temp_int_lag_95'] * df_P['Rh_int_lag_95'] * df_P['e2v03_int_lag_95']
df_P['o3_temp_rh_slope_lag_95'] = df_P['Temp_slope_lag_95'] * df_P['Rh_slope_lag_95'] * df_P['e2v03_slope_lag_95'] 

df_P['o3_temp_rh_int_lead_35'] = df_P['Temp_int_lead_35'] * df_P['Rh_int_lead_35'] * df_P['e2v03_int_lead_35'] 
df_P['o3_temp_rh_slope_lead_35'] = df_P['Temp_slope_lead_35'] * df_P['Rh_slope_lead_35'] * df_P['e2v03_slope_lead_35']
df_P['o3_temp_rh_int_lead_65'] = df_P['Temp_int_lead_65'] * df_P['Rh_int_lead_65'] * df_P['e2v03_int_lead_65'] 
df_P['o3_temp_rh_slope_lead_65'] = df_P['Temp_slope_lead_65'] * df_P['Rh_slope_lead_65'] * df_P['e2v03_slope_lead_65'] 
df_P['o3_temp_rh_int_lead_95'] = df_P['Temp_int_lead_95'] * df_P['Rh_int_lead_95'] * df_P['e2v03_int_lead_95']
df_P['o3_temp_rh_slope_lead_95'] = df_P['Temp_slope_lead_95'] * df_P['Rh_slope_lead_95'] * df_P['e2v03_slope_lead_95']


In [583]:
#Delete the first and last 115 rows of the dataframe to remove all NaNs.
df_P = df_P.ix[5:len(df_P['e2v03'])-5]
print df_P.ix[:5]

                       O3_ppb      UnixTime       e2v03       Temp         Rh  \
DATE (MM/DD/YYYY)_MST                                                           
2014-07-13 04:27:00        24  1.405226e+09  107.000000  25.900000  42.766667   
2014-07-13 04:28:00        26  1.405226e+09  104.500000  25.900000  42.700000   
2014-07-13 04:29:00        27  1.405226e+09  104.000000  25.900000  42.766667   
2014-07-13 04:30:00        26  1.405226e+09  104.000000  25.900000  42.833333   
2014-07-13 04:31:00        26  1.405226e+09  104.666667  25.933333  43.000000   

                       Zenith Angle [degrees]  days from start    O3_sqrt  \
DATE (MM/DD/YYYY)_MST                                                       
2014-07-13 04:27:00                  93.60655                0  10.344080   
2014-07-13 04:28:00                  93.44422                0  10.222524   
2014-07-13 04:29:00                  93.28160                0  10.198039   
2014-07-13 04:30:00                  93.11868  

In [584]:
df_P.to_csv(path_or_buf = 'data/D8_raw_features.csv')