# Identify Relevant Features

## Single Feature Set Example

In [1]:
import os 
import pandas as pd
from tsraster.calculate import checkRelevance
import tsraster.prep as tr


# read csv extracted features
features = pd.read_csv(r"../Data/Examples/3month_features/extracted_features.csv")

Read in target data (variable you want to predict). 

In [2]:
#%% path to folder holding target data raster
target_variable = r"C:\Users\mmann\Documents\wildfire_FRAP\Data\Examples\3month_fire"

target_data = tr.targetData(target_variable)


In [3]:
#check for relevance
relevance = checkRelevance(features, target_data)
relevance

Unnamed: 0_level_0,feature,type,p_value,relevant
feature,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
aet-value__mean,aet-value__mean,real,3.816483e-297,True
aet-value__quantile__q_0.95,aet-value__quantile__q_0.95,real,4.082583e-288,True
aet-value__maximum,aet-value__maximum,real,3.366128e-286,True
aet-value__minimum,aet-value__minimum,real,1.02945e-254,True
aet-value__quantile__q_0.15,aet-value__quantile__q_0.15,real,1.9168809999999998e-229,True


## Multiple Feature Sets Example

In this case we have multiple data sets that we need to summarize. Here is an example of temperature and precipitation.
``` 
temprature
    2005
        tmx-200501.tif 
        tmx-200502.tif
        tmx-200503.tif ...
    2006
        tmx-200601.tif
        tmx-200602.tif
        tmx-200603.tif...
precip
    2005
        ppt-200501.tif 
        ppt-200502.tif
        ppt-200503.tif ...
``` 

In [8]:
# append all features to one dataframe 
path = r'F:/5year/'

all_files = [os.path.join(root, name)
             for root, dirs, files in os.walk(path)
             for name in files
             if name.endswith(( "features.csv"))]

df_from_each_file = (pd.read_csv(f) for f in all_files)
concatenated_df   = pd.concat(df_from_each_file,axis=1, ignore_index=False)
concatenated_df.columns


Index(['id',
       'aet-value__agg_linear_trend__f_agg_"max"__chunk_len_6__attr_"slope"',
       'aet-value__agg_linear_trend__f_agg_"min"__chunk_len_6__attr_"slope"',
       'aet-value__count_above_mean', 'aet-value__count_below_mean',
       'aet-value__last_location_of_maximum',
       'aet-value__last_location_of_minimum',
       'aet-value__longest_strike_above_mean',
       'aet-value__longest_strike_below_mean', 'aet-value__maximum',
       ...
       'tmx-value__number_cwt_peaks__n_12', 'tmx-value__number_cwt_peaks__n_6',
       'tmx-value__quantile__q_0.05', 'tmx-value__quantile__q_0.15',
       'tmx-value__quantile__q_0.85', 'tmx-value__quantile__q_0.95',
       'tmx-value__ratio_beyond_r_sigma__r_2',
       'tmx-value__ratio_beyond_r_sigma__r_3', 'tmx-value__skewness',
       'tmx-value__sum_values'],
      dtype='object', length=144)

In [9]:
#%% read target data
target_variable = "F:/5year/Fires/"

target_data = tr.targetData(target_variable)

In [10]:
checkRelevance(concatenated_df, target_data)


Unnamed: 0_level_0,feature,type,p_value,relevant
feature,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"pet-value__agg_linear_trend__f_agg_""max""__chunk_len_6__attr_""slope""","pet-value__agg_linear_trend__f_agg_""max""__chun...",real,0.000000e+00,True
ppt-value__longest_strike_above_mean,ppt-value__longest_strike_above_mean,real,0.000000e+00,True
ppt-value__last_location_of_minimum,ppt-value__last_location_of_minimum,real,0.000000e+00,True
ppt-value__last_location_of_maximum,ppt-value__last_location_of_maximum,real,0.000000e+00,True
ppt-value__count_below_mean,ppt-value__count_below_mean,real,0.000000e+00,True
ppt-value__count_above_mean,ppt-value__count_above_mean,real,0.000000e+00,True
"ppt-value__agg_linear_trend__f_agg_""min""__chunk_len_6__attr_""slope""","ppt-value__agg_linear_trend__f_agg_""min""__chun...",real,0.000000e+00,True
"ppt-value__agg_linear_trend__f_agg_""max""__chunk_len_6__attr_""slope""","ppt-value__agg_linear_trend__f_agg_""max""__chun...",real,0.000000e+00,True
pet-value__sum_values,pet-value__sum_values,real,0.000000e+00,True
pet-value__skewness,pet-value__skewness,real,0.000000e+00,True
