# Import Packages

In [16]:
import rasterio
import pandas as pd
from os import listdir

# Load Downscaled Future Climate Data

In [17]:
# File name structure
gcm_model_name = "ACCESS-CM2"
ssp_model_names = ["ssp126","ssp585"]
feature_names = ["tmin","tmax","prec","bioc"]
data_path = "../raw_data/wc2021-2040/"

# Load data
data_filenames = [filename for filename in listdir(data_path) if filename.endswith("downscaled.tif")]
data_filenames.sort()
data_filenames_dict = {scenario: [filename for filename in data_filenames if scenario in filename]\
                       for scenario in ssp_model_names}
data_list = [rasterio.open(data_path+file) for file in data_filenames]
data_read_list = [spatial_data.read() for spatial_data in data_list]
data_dict = dict(zip(data_filenames,zip(data_list,data_read_list))) # if you need a dictionary
shape_list = [item.shape for item in data_read_list]
print("shapes",shape_list)
print(data_filenames)

shapes [(19, 360, 720), (19, 360, 720), (12, 360, 720), (12, 360, 720), (12, 360, 720), (12, 360, 720), (12, 360, 720), (12, 360, 720)]
['wc2.1_10m_bioc_ACCESS-CM2_ssp126_2021-2040_downscaled.tif', 'wc2.1_10m_bioc_ACCESS-CM2_ssp585_2021-2040_downscaled.tif', 'wc2.1_10m_prec_ACCESS-CM2_ssp126_2021-2040_downscaled.tif', 'wc2.1_10m_prec_ACCESS-CM2_ssp585_2021-2040_downscaled.tif', 'wc2.1_10m_tmax_ACCESS-CM2_ssp126_2021-2040_downscaled.tif', 'wc2.1_10m_tmax_ACCESS-CM2_ssp585_2021-2040_downscaled.tif', 'wc2.1_10m_tmin_ACCESS-CM2_ssp126_2021-2040_downscaled.tif', 'wc2.1_10m_tmin_ACCESS-CM2_ssp585_2021-2040_downscaled.tif']


# Plant Coordinates

## Add Future Climate Features

In [18]:
# Getting feature values from spatial data sets
# The following function retrieves the values for given lat/lon
def getvalue(data, lat, lon, band=0):
    data_array = data.read()[band]
    # since the raster is in regular lon/lat grid, we can use 
    # `data.index()` to identify the index of a given lon/lat pair
    idx = data.index(lon, lat, precision=1E-6)    
    return data_array[idx]

In [19]:
# Import the plant data provided by David
plant_data_inkl_bioclim = pd.read_csv("../raw_data/plant_data_inkl_bioclim/plant_data_inkl_bioclim_101.csv")
plant_data_inkl_bioclim = plant_data_inkl_bioclim[['species', 'scientificName',
                                                   'decimalLatitude', 'decimalLongitude']]
plant_data_inkl_bioclim.drop_duplicates(keep=False, inplace=True)
plant_data_future_df_dict = {scenario: plant_data_inkl_bioclim.copy()\
                             for scenario in ssp_model_names}
# plant_data_future.shape
# plant_data_future.head(2)

In [20]:
# To add feature values to each dataframe inside "plant_data_future_df_dict",
# we loop over all bands of every .tif dataset and apply the function getvalue():
for scenario in ssp_model_names:
    for data_filename in data_filenames_dict[scenario]:
        data_abrev = data_filename[10:14]
        for i in range(data_dict[data_filename][1].shape[0]):
            plant_data_future_df_dict[scenario][data_abrev+str(i+1)]=\
            getvalue(data_dict[data_filename][0],plant_data_future_df_dict[scenario]["decimalLatitude"],
                     plant_data_future_df_dict[scenario]["decimalLongitude"],band=i)

# plant_data_future_df_dict["ssp126"].head().loc[:,"tmax1":"tmax12"] # Check columns







In [21]:
plant_data_future_df_dict["ssp126"].head().loc[:,"tmax1":"tmax12"]

Unnamed: 0,tmax1,tmax2,tmax3,tmax4,tmax5,tmax6,tmax7,tmax8,tmax9,tmax10,tmax11,tmax12
1,,,,,,,,,,,,
3,31.160494,30.885185,30.739506,30.25679,29.132099,28.22716,28.482716,30.603704,31.803703,32.282715,32.341976,31.367901
4,30.311111,30.830864,31.017284,31.037037,30.793827,30.397532,30.751852,33.182716,34.227161,33.425926,32.160492,30.944445
5,31.372839,31.519753,31.91605,31.875309,31.419752,31.383951,32.465431,33.854321,34.932098,34.792591,34.048149,31.892591
6,8.777778,12.206173,17.687654,23.34074,27.432098,31.75679,34.428394,34.219753,31.095062,25.290123,18.223457,12.437037


In [22]:
plant_data_future_df_dict["ssp585"].head().loc[:,"tmax1":"tmax12"]

Unnamed: 0,tmax1,tmax2,tmax3,tmax4,tmax5,tmax6,tmax7,tmax8,tmax9,tmax10,tmax11,tmax12
1,,,,,,,,,,,,
3,31.317284,30.959259,30.860493,30.42716,29.17037,28.371605,28.546913,30.744444,32.103703,32.388889,32.467903,31.482716
4,30.465431,31.018518,31.140741,31.145679,30.769136,30.470369,30.701235,33.23827,34.704937,33.697533,32.412346,31.119753
5,31.74321,31.777777,32.185184,32.304939,31.864197,31.550617,32.49506,33.943211,35.124691,35.007408,34.155556,32.390125
6,8.78642,12.664198,18.028395,23.492592,27.806173,31.951851,34.335804,34.164196,31.338272,25.580246,18.182716,12.339506


In [23]:
# Number of rows of two datasets
for scenario in ssp_model_names:
    print(len(plant_data_future_df_dict[scenario]))

85438
85438


In [32]:
# All future climate features
bclim_feature_list = ["bioc"+str(i) for i in range(1,20)]
prec_feature_list = ["prec"+str(i) for i in range(1,13)]
tmin_feature_list = ["tmin"+str(i) for i in range(1,13)]
tmax_feature_list = ["tmax"+str(i) for i in range(1,13)]
feature_list = bclim_feature_list+prec_feature_list+tmin_feature_list+tmax_feature_list
# len(feature_list) # 55 features

## Remove NAs for Clustering and Save Data

In [24]:
plant_data_future_df_dict_drop_na = {scenario: plant_data_future_df_dict[scenario].dropna(subset=feature_list)\
                                    for scenario in ssp_model_names}

In [25]:
# Number of rows of two new datasets
for scenario in ssp_model_names:
    print(len(plant_data_future_df_dict_drop_na[scenario]))

55606
55606


In [31]:
# Drop NA's
# plant_data_future = plant_data_future.dropna(subset=feature_list)
# Save data
plant_data_future_df_dict_drop_na["ssp126"].to_csv("../raw_data/plant_data_inkl_bioclim/plant_data_future.csv")
# Check data
plant_data_future.head(2)

Unnamed: 0,species,scientificName,decimalLatitude,decimalLongitude,bioc1,bioc2,bioc3,bioc4,bioc5,bioc6,...,tmin3,tmin4,tmin5,tmin6,tmin7,tmin8,tmin9,tmin10,tmin11,tmin12
3,Desmodium longiarticulatum,Meibomia longiarticulata Rusby,-15.7896,-66.9742,25.164198,11.12716,67.701233,157.433334,32.511112,16.081482,...,21.2679,20.095062,18.569136,16.987654,16.081482,17.151852,18.987654,20.682716,21.001234,21.287655
4,Xenurobrycon coracoralinae,"Xenurobrycon coracoralinae Moreira, 2005",-15.9333,-50.15,26.060493,11.37284,62.476543,143.379013,34.704937,16.49753,...,21.279013,21.019753,19.134567,17.00247,16.49753,18.844444,21.430864,22.475309,22.282717,21.469135
