In [None]:
def dataframe_features_sensorANDcow(dataframe_name, label_var, time_back_number, features_sensor_list, features_animal_list):
    
    ''' This function is to create a dataframe with all the features I will be needing later on
    Inputs:
    - dataframe that contains the label_variable or outcome, the sensor features in the list provided,
       and the animal features provided in the list.
    - the label variable, as string.
    - features_sensor_list: the list of t_minusX features from the sensor
    - features_animal_list: the list of variables at the animal-level
    - time_back_number:
    
    Output:
    It combines all the dataframes with the features into one single dataframe, and 
    a separate dataframe with the labels. It keeps the relationship with the index'''
    
    #1. Remove null rows for the label column
    df = dataframe_name.loc[(dataframe_name[label_var].notnull())]
    df = df.replace('missing', np.nan)
    
    # To control for the number of days back
    time_back = time_back_number
    values = range(0, time_back)
    
    features_sensor_list.sort()
    features_sensor_list = features_sensor_list[0:len(values)]
    
    #2. Change into a number so I can standardize the sensor values
    for i in features_sensor_list:
        df[i] = df[i].astype('float')
    
    #3. Standardization of the sensor data. It will create a separate dataframe
    df_std = pd.DataFrame()
    for i in features_sensor_list:
        df_std_i = df[[i]].apply(lambda x:(x - np.nanmean(x)) / (np.nanstd(x)))
        df_std = pd.concat([df_std, df_std_i], axis = 1)
    
    #4. Selection of other features associated with the animal itself, such as ID (categorical variables)
    df_animal_features = pd.DataFrame()
    for i in features_animal_list:
        df_i = df[[i]]
        df_animal_features = pd.concat([df_animal_features, df_i], axis = 1)
    
    #5. Concatenation of all the dataframes, sensor, and other features
    df_all_features = pd.concat([df_std, df_animal_features], axis = 1)
    
    #6. Selecting the features with no null values, creating a dataframe with a column full of 1s
    df_features = pd.DataFrame(data = df_all_features.index)
    df_features['fake'] = 1
    df_features = df_features.set_index(0)
    
    # and drop those columns where all values are NaN
    df_all_features = df_all_features.dropna(axis = 1, how = 'all')
    
    columns = list(df_all_features)
    for i in columns:
        df_i = df_all_features.loc[(df_all_features[i].notnull())][[i]]
        df_features = df_features.join(df_i, how = 'inner')
    
    # and drop the first column
    df_features = df_features.drop(columns = 'fake')
    
    #7. Selecting the label: take the column with the label and match the indexes to a list of indexes
    df_label = dataframe_name.loc[(dataframe_name[label_var].notnull())][[label_var]]
    
    list_index = df_features.index.values.tolist()
    df_label = df_label.loc[list_index]
    
    return (df_features, df_label)

In [None]:
def dataframe_features_sensorANDcow(dataframe_name, label_var, time_back_number, features_sensor_list, features_animal_list):
    
    ''' This function is to create a dataframe with all the features I will be needing later on
    Inputs:
    - dataframe that contains the label_variable or outcome, the sensor features in the list provided,
       and the animal features provided in the list.
    - the label variable, as string.
    - features_sensor_list: the list of t_minusX features from the sensor
    - features_animal_list: the list of variables at the animal-level
    - time_back_number:
    
    Output:
    It combines all the dataframes with the features into one single dataframe, and 
    a separate dataframe with the labels. It keeps the relationship with the index'''
    
    #1. Remove null rows for the label column
    df = dataframe_name.loc[(dataframe_name[label_var].notnull())]
    df = df.replace('missing', np.nan)
    
    # To control for the number of days back
    time_back = time_back_number
    values = range(0, time_back)
    
    features_sensor_list.sort()
    features_sensor_list = features_sensor_list[0:len(values)]
    
    #2. Change into a number so I can standardize the sensor values
    for i in features_sensor_list:
        df[i] = df[i].astype('float')
    
    #3. Standardization of the sensor data. It will create a separate dataframe
    df_std = pd.DataFrame()
    for i in features_sensor_list:
        df_std_i = df[[i]].apply(lambda x:(x - np.nanmean(x)) / (np.nanstd(x)))
        df_std = pd.concat([df_std, df_std_i], axis = 1)
    
    #4. Selection of other features associated with the animal itself, such as ID (categorical variables)
    df_animal_features = pd.DataFrame()
    for i in features_animal_list:
        df_i = df[[i]]
        df_animal_features = pd.concat([df_animal_features, df_i], axis = 1)
    
    #5. Concatenation of all the dataframes, sensor, and other features
    df_all_features = pd.concat([df_std, df_animal_features], axis = 1)
    
    #6. Selecting the features with no null values, creating a dataframe with a column full of 1s
    df_features = pd.DataFrame(data = df_all_features.index)
    df_features['fake'] = 1
    df_features = df_features.set_index(0)
    
    # and drop those columns where all values are NaN
    df_all_features = df_all_features.dropna(axis = 1, how = 'all')
    
    columns = list(df_all_features)
    for i in columns:
        df_i = df_all_features.loc[(df_all_features[i].notnull())][[i]]
        df_features = df_features.join(df_i, how = 'inner')
    
    # and drop the first column
    df_features = df_features.drop(columns = 'fake')
    
    #7. Selecting the label: take the column with the label and match the indexes to a list of indexes
    df_label = dataframe_name.loc[(dataframe_name[label_var].notnull())][[label_var]]
    
    list_index = df_features.index.values.tolist()
    df_label = df_label.loc[list_index]
    
    return (df_features, df_label)

In [None]:
features_sensor_list24 = ['t_minus3', 't_minus2', 't_minus1']

features_sensor_list12 = ['t_minus6', 't_minus5', 't_minus4', 't_minus3', 't_minus2', 't_minus1']

features_sensor_list6 = ['t_minus1', 't_minus2', 't_minus3', 't_minus4', 't_minus5',
                         't_minus6', 't_minus7', 't_minus8', 't_minus9', 't_minus10', 
                         't_minus11', 't_minus12', 't_minus13', 't_minus14', 't_minus15',
                         't_minus16', 't_minus17', 't_minus18', 't_minus19', 't_minus20',
                         't_minus21', 't_minus22', 't_minus23', 't_minus24', 't_minus25',
                         't_minus26', 't_minus27', 't_minus28', 't_minus29', 't_minus30',
                         't_minus31', 't_minus32', 't_minus33', 't_minus34', 't_minus35',
                         't_minus36', 't_minus37', 't_minus38', 't_minus39', 't_minus40', 
                         't_minus41', 't_minus42', 't_minus43', 't_minus44', 't_minus45',
                         't_minus46', 't_minus47', 't_minus48', 't_minus49', 't_minus50',
                         't_minus51', 't_minus52', 't_minus53', 't_minus54', 't_minus55',
                         't_minus56']

features_sensor_list3 = ['t_minus24', 't_minus23', 't_minus22', 't_minus21', 't_minus20',
                         't_minus19', 't_minus18', 't_minus17', 't_minus16', 't_minus15',
                         't_minus14', 't_minus13', 't_minus12', 't_minus11', 't_minus10',
                         't_minus9', 't_minus8', 't_minus7', 't_minus6', 't_minus5',
                         't_minus4', 't_minus3', 't_minus2', 't_minus1']

features_animal = ['cow_id_x']