In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import datacube
from sklearn.preprocessing import OneHotEncoder
# pd.set_option('display.max_rows',None)
# pd.set_option('display.max_columns',None)

Run all the cells to create a training set

In [2]:
# dataset containing the flooding events

#Example of datacube config file:
#datacube_config_path = "/home/user/datacube.conf"
datacube_config_path = "path_to_datacube_config_file"
dc = datacube.Datacube(app = "my_app", config = datacube_config_path)
products = dc.list_products()
datasets = dc.find_datasets(product="flood_extent_year")
flood_data = dc.load(datasets=datasets)
flood_data

In [3]:
# dataset with distance from main rivers and basins 
# used to check distance from rivers of selected flooded and non flooded points
main_water_distance = xr.open_dataset("data/main_water_distance.tif").squeeze()
main_water_distance

In [4]:
# dataset with the perimeter where all CF datasets overlap
cf_perimeter = xr.open_dataset("data/flood_cf_perimeter.tif").squeeze()
cf_perimeter

In [5]:
lc_datasets = dc.find_datasets(product='dusaf15')
lc = dc.load(datasets=lc_datasets)

In [6]:
# select indexes of flooded points filtered by year
flooded = flood_data.where(flood_data>1999).squeeze()
flooded_ind = np.argwhere(np.array(flooded.value>1999))
flooded_ind.shape

(1339179, 2)

In [7]:
# check the distance from main water of the point and returns True if greater than min_dist
# point_index contains the xarray indexes of the point which can be used to obtain the coordinates 
def check_water_distance(point_index, min_dist):
    y = flooded.y.isel(y=point_index[0]).values
    x = flooded.x.isel(x=point_index[1]).values
    distance = main_water_distance.band_data.sel(y=y,x=x).values
    return distance>min_dist

In [8]:
# check that the point is inside the perimeter
def check_perimeter(point_index):
    y = flooded.y.isel(y=point_index[0]).values
    x = flooded.x.isel(x=point_index[1]).values
    inside = cf_perimeter.band_data.sel(y=y,x=x).values
    return inside==1

In [9]:
#check that the land cover value of the point is different from the specified ones
def check_outside_dusaf_water(point_index):
    y = flooded.y.isel(y=point_index[0]).values
    x = flooded.x.isel(x=point_index[1]).values
    dusaf_value = lc.squeeze().codice.sel(y=y,x=x).values
    return (str(int(dusaf_value))[0] != '5' and str(int(dusaf_value)) != '331' and str(int(dusaf_value)) != '3222')

In [10]:
#randomly select N number of flooded points (xarray indexes) that satisfy the criteria
selected_ind = []
while len(selected_ind) < 10000:
    rand_idx = np.random.randint(0, len(flooded_ind))
    rand_point = flooded_ind[rand_idx]
    if (check_water_distance(rand_point, 20) and check_perimeter(rand_point) and check_outside_dusaf_water(rand_point)):
        selected_ind.append(rand_point.tolist())
        flooded_ind[rand_idx] = flooded_ind[len(flooded_ind) - 1]
        flooded_ind = flooded_ind[:-1]
selected_ind = np.array(selected_ind)
selected_ind

array([[ 8125,  4165],
       [ 3012, 12579],
       [ 1950,   127],
       ...,
       [ 6166,  2343],
       [ 2170,   110],
       [ 7055,  3271]])

In [11]:
# get y coordinates of selected flooded indexes
y = flooded.y.isel(y=selected_ind[:,0]).values
y

array([5013672.5, 5039237.5, 5044547.5, ..., 5023467.5, 5043447.5,
       5019022.5])

In [12]:
# get x coordinates of selected flooded indexes
x = flooded.x.isel(x=selected_ind[:,1]).values
x

array([497872.5, 539942.5, 477682.5, ..., 488762.5, 477597.5, 493402.5])

In [13]:
selected_flood_coord = np.column_stack((y, x))
selected_flood_coord

array([[5013672.5,  497872.5],
       [5039237.5,  539942.5],
       [5044547.5,  477682.5],
       ...,
       [5023467.5,  488762.5],
       [5043447.5,  477597.5],
       [5019022.5,  493402.5]])

In [14]:
# get indexes of non flooded areas
non_flooded = flood_data.where(flood_data==0).squeeze()
non_flooded_ind = np.argwhere(np.array(non_flooded.value==0))
non_flooded_ind

array([[    1,  3669],
       [    2,  3664],
       [    2,  3665],
       ...,
       [10660, 12178],
       [10660, 12179],
       [10660, 12180]])

In [15]:
#randomly select N number of non flooded points (xarray indexes) that satisfy the criteria
selected_non_ind = []
while len(selected_non_ind) < 10000:
    rand_idx = np.random.randint(0, len(non_flooded_ind))
    rand_point = non_flooded_ind[rand_idx]
    if (check_water_distance(rand_point, 200) and check_perimeter(rand_point) and check_outside_dusaf_water(rand_point)):
        selected_non_ind.append(rand_point.tolist())
        non_flooded_ind[rand_idx] = non_flooded_ind[len(non_flooded_ind) - 1]
        non_flooded_ind = non_flooded_ind[:-1]
selected_non_ind = np.array(selected_non_ind)
selected_non_ind

array([[ 5956,  2585],
       [ 1760,  2453],
       [ 4157,  9661],
       ...,
       [ 2192, 12079],
       [ 2989,  1695],
       [ 2915, 10470]])

In [16]:
y_non = non_flooded.y.isel(y=selected_non_ind[:,0]).values
y_non

array([5024517.5, 5045497.5, 5033512.5, ..., 5043337.5, 5039352.5,
       5039722.5])

In [17]:
x_non = non_flooded.x.isel(x=selected_non_ind[:,1]).values
x_non

array([489972.5, 489312.5, 525352.5, ..., 537442.5, 485522.5, 529397.5])

In [18]:
selected_non_flood_coord = np.column_stack((y_non, x_non))
selected_non_flood_coord

array([[5024517.5,  489972.5],
       [5045497.5,  489312.5],
       [5033512.5,  525352.5],
       ...,
       [5043337.5,  537442.5],
       [5039352.5,  485522.5],
       [5039722.5,  529397.5]])

In [19]:
# coordinate pairs of both selected flooded and non flooded points
selected_coord = np.concatenate((selected_flood_coord,selected_non_flood_coord))
selected_coord

array([[5013672.5,  497872.5],
       [5039237.5,  539942.5],
       [5044547.5,  477682.5],
       ...,
       [5043337.5,  537442.5],
       [5039352.5,  485522.5],
       [5039722.5,  529397.5]])

In [20]:
# extract from past flood dataset only the points corresponding to the selected coordinates
sel_data = flood_data.squeeze().sel(y=xr.DataArray(selected_coord[:,0], dims = ['index']), x = xr.DataArray(selected_coord[:,1], dims=['index']))
flood_training = sel_data.to_dataframe()
flood_var_name = list(sel_data.data_vars.keys())[0]
flood_training.rename(columns={flood_var_name:'flooded'},inplace=True)
flood_training.drop(['time','spatial_ref'],axis=1,inplace=True)
flood_training

Unnamed: 0_level_0,y,x,flooded
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,5013672.5,497872.5,2000.0
1,5039237.5,539942.5,2002.0
2,5044547.5,477682.5,2000.0
3,5044942.5,477432.5,2000.0
4,5045907.5,540982.5,2002.0
...,...,...,...
19995,5038672.5,506657.5,0.0
19996,5031042.5,507662.5,0.0
19997,5043337.5,537442.5,0.0
19998,5039352.5,485522.5,0.0


In [21]:
# get unique lithology values
geo_datasets = dc.find_datasets(product='geologia')
geo = dc.load(datasets=geo_datasets)
geo_data = geo.codice.squeeze().values
geo_data = geo_data.flatten()
# geo_data = geo_data.reshape(-1,1)
geo_cat = np.unique(geo_data)
geo_cat = geo_cat.reshape(-1,1)[:-1]
del geo_data
geo_cat

array([[  8.],
       [ 76.],
       [ 81.],
       [101.],
       [201.],
       [205.],
       [206.],
       [207.],
       [301.]], dtype=float32)

In [22]:
# get unique land cover values
lc_data = lc.codice.squeeze().values
lc_data = lc_data.flatten()
# geo_data = geo_data.reshape(-1,1)
lc_cat = np.unique(lc_data)
lc_cat = lc_cat.reshape(-1,1)[:-1]
del lc_data
lc_cat

array([[  122.],
       [  124.],
       [  131.],
       [  132.],
       [  133.],
       [  134.],
       [  213.],
       [  221.],
       [  222.],
       [  223.],
       [  314.],
       [  331.],
       [  411.],
       [  511.],
       [ 1111.],
       [ 1112.],
       [ 1121.],
       [ 1122.],
       [ 1123.],
       [ 1221.],
       [ 1222.],
       [ 1411.],
       [ 1412.],
       [ 1421.],
       [ 1422.],
       [ 1423.],
       [ 1424.],
       [ 2111.],
       [ 2112.],
       [ 2115.],
       [ 2241.],
       [ 2242.],
       [ 2311.],
       [ 2312.],
       [ 3113.],
       [ 3121.],
       [ 3221.],
       [ 3222.],
       [ 3223.],
       [ 3241.],
       [ 3242.],
       [ 5121.],
       [ 5122.],
       [ 5123.],
       [11231.],
       [12111.],
       [12112.],
       [12121.],
       [12122.],
       [12123.],
       [12124.],
       [12125.],
       [12126.],
       [21131.],
       [21132.],
       [21141.],
       [21142.],
       [31111.],
       [31112.

In [23]:
"""
0 - ghiaie, sabbie e limi
1 - ghiaie, sabbie
2 - ghiaie, sabbie e argille ferrettizzate
3 - argille, calcari, conglomerati
4 - ghiaie, limi e argille fortemente ferrettizzati
5 - conglomerati, sabbie, argille
6 - marne
"""

# map unique values to specified lithology classes

geo_cat_dict = {
    101 : 0,
    201 : 0,
    301 : 0,
    205 : 1,
    206 : 2,
    76  : 3,
    207 : 4,
    8   : 5,
    81  : 6
}

mapped_geo_cat = np.vectorize(geo_cat_dict.get)(geo_cat.flatten())
mapped_geo_cat = np.unique(mapped_geo_cat).reshape(-1,1)
mapped_geo_cat

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6]])

In [24]:
# map land cover values to reduced land cover classes
def map_land_cat(value):
    value_str = str(int(value))
    value_cat = value_str[:2]
    if value_cat == '13':
        value_cat = '12'
    return int(value_cat)

In [25]:
mapped_lc_cat = np.vectorize(map_land_cat)(lc_cat.flatten())
mapped_lc_cat = np.unique(mapped_lc_cat).reshape(-1,1)
mapped_lc_cat

array([[11],
       [12],
       [14],
       [21],
       [22],
       [23],
       [31],
       [32],
       [33],
       [41],
       [51]])

In [26]:
geo_enc = OneHotEncoder()
geo_enc.fit(mapped_geo_cat)
geo_enc.categories_


[array([0, 1, 2, 3, 4, 5, 6])]

In [27]:
lc_enc = OneHotEncoder()
lc_enc.fit(mapped_lc_cat)
lc_enc.categories_

[array([11, 12, 14, 21, 22, 23, 31, 32, 33, 41, 51])]

In [28]:
# create dataframe with all selected conditioning factors values corresponding to the selected coordinates
cf_list = ['dtm_milan','aspect','dusaf','dusaf15','dusaf99','geologia','hillshade','ndvi_2019','ndvi_2014','ndvi_2002','ndvi_2000','plan_curvature','profile_curvature',
           'water_distance','slope','spi','tri','twi']
for cf in cf_list:
    datasets = dc.find_datasets(product=cf)
    cf_data = dc.load(datasets=datasets)
    cf_sel = cf_data.squeeze().sel(y=xr.DataArray(selected_coord[:,0], dims = ['index']), x = xr.DataArray(selected_coord[:,1], dims=['index']))
    cf_var_name = list(cf_data.data_vars.keys())[0]
    del cf_data
    cf_df = cf_sel.to_dataframe()
    cf_df.rename(columns={cf_var_name:cf},inplace=True)
    cf_df.drop(['time','spatial_ref'],axis=1,inplace=True)
    flood_training = cf_df.merge(flood_training,on=['y','x'])
    print(cf + ' done')
flood_training

dtm_milan done
aspect done
dusaf done
dusaf15 done
dusaf99 done
geologia done
hillshade done
ndvi_2019 done
ndvi_2014 done
ndvi_2002 done
ndvi_2000 done
plan_curvature done
profile_curvature done
water_distance done
slope done
spi done
tri done
twi done


Unnamed: 0,y,x,twi,tri,spi,slope,water_distance,profile_curvature,plan_curvature,ndvi_2000,...,ndvi_2014,ndvi_2019,hillshade,geologia,dusaf99,dusaf15,dusaf,aspect,dtm_milan,flooded
0,5013672.5,497872.5,10.121225,0.271741,1.796980,0.921623,74.330345,0.274237,-0.129694,0.374187,...,0.347476,0.810118,181.0,101.0,31111.0,31111.0,31111.0,30.754047,71.105003,2000.0
1,5039237.5,539942.5,6.480947,0.596306,-10.170015,2.193870,22.360680,0.290859,-0.842747,0.324124,...,0.380291,0.705645,187.0,101.0,3113.0,3113.0,3113.0,317.719330,107.760887,2002.0
2,5044547.5,477682.5,10.975347,0.275029,0.666227,0.343284,28.284271,0.298350,-0.093618,0.402057,...,0.377064,0.863158,180.0,201.0,31111.0,31111.0,31111.0,144.860947,141.871002,2000.0
3,5044942.5,477432.5,5.851149,1.874699,-9.540217,4.113334,20.000000,-4.963612,2.188366,0.382736,...,0.419479,0.805548,191.0,201.0,31111.0,31111.0,31111.0,286.229279,140.513000,2000.0
4,5045907.5,540982.5,6.548541,0.468189,-10.237609,2.050606,10.000000,-0.424163,0.419770,0.357586,...,0.263703,0.798993,175.0,101.0,2111.0,2111.0,2111.0,167.089020,128.951996,2002.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,5038672.5,506657.5,4.716925,2.704538,-8.405999,12.602149,395.379578,-0.547879,0.292087,0.287325,...,0.366211,0.635875,186.0,205.0,2115.0,1411.0,1411.0,30.427158,143.399994,0.0
19996,5031042.5,507662.5,12.361214,0.206014,3.574872,0.711363,143.178223,0.424424,-0.139602,0.131484,...,0.107608,0.103639,179.0,205.0,12111.0,12111.0,12111.0,160.105316,116.323997,0.0
19997,5043337.5,537442.5,9.499601,0.533417,1.820767,1.286905,20.615528,1.159952,-1.036093,0.177463,...,0.181434,0.228143,177.0,205.0,1112.0,1112.0,1112.0,167.138977,137.824997,0.0
19998,5039352.5,485522.5,8.602279,0.113127,-1.471528,0.526209,323.109894,-0.048511,0.011975,0.147619,...,0.105345,0.156583,179.0,205.0,1111.0,1111.0,1111.0,136.804199,157.609100,0.0


In [29]:
# check that there are no nulls in the dataframe
# the only null values that can happen at the moment are in dusaf15 and dusaf99 due to errors in the original shapefiles
for cf in cf_list:
    print(cf + '  ' + str(flood_training[cf].isna().sum()))

dtm_milan  0
aspect  0
dusaf  0
dusaf15  0
dusaf99  0
geologia  0
hillshade  0
ndvi_2019  0
ndvi_2014  0
ndvi_2002  0
ndvi_2000  0
plan_curvature  0
profile_curvature  0
water_distance  0
slope  0
spi  0
tri  0
twi  0


In [30]:
# function that returns the correct ndvi value based on the year of the event
def select_ndvi(ndvi2000,ndvi2002,ndvi2014,ndvi2019,flood_year):
    if flood_year == 2000:
        return ndvi2000
    elif flood_year == 2002:
        return ndvi2002
    elif flood_year == 2014:
        return ndvi2014
    else:
        return ndvi2019

In [31]:
# function that returns the correct land cover value based on the year of the event
def select_dusaf(dusaf99,dusaf15,dusaf19,flood_year):
    if flood_year == 2000 or flood_year == 2002:
        return dusaf99
    elif flood_year == 2014:
        return dusaf15
    else:
        return dusaf19

In [32]:
# select the correct land cover value
dusaf = np.vectorize(select_dusaf)(flood_training['dusaf99'],flood_training['dusaf15'],flood_training['dusaf'],flood_training['flooded'])
dusaf

array([31111.,  3113., 31111., ...,  1112.,  1111., 12112.], dtype=float32)

In [33]:
# select the correct ndvi value
ndvi = np.vectorize(select_ndvi)(flood_training['ndvi_2000'],flood_training['ndvi_2002'],flood_training['ndvi_2014'],flood_training['ndvi_2019'],flood_training['flooded'])
ndvi

array([0.37418708, 0.3392357 , 0.4020566 , ..., 0.22814275, 0.1565828 ,
       0.59719175], dtype=float32)

In [34]:
flood_training['dusaf_year'] = dusaf

In [35]:
flood_training['ndvi_year'] = ndvi

In [36]:
# map land cover and lithology values to the new specified classes
flood_training['geologia'] = np.vectorize(geo_cat_dict.get)(flood_training['geologia'])
flood_training['dusaf_year'] = np.vectorize(map_land_cat)(flood_training['dusaf_year'])

In [37]:
flood_training

Unnamed: 0,y,x,twi,tri,spi,slope,water_distance,profile_curvature,plan_curvature,ndvi_2000,...,hillshade,geologia,dusaf99,dusaf15,dusaf,aspect,dtm_milan,flooded,dusaf_year,ndvi_year
0,5013672.5,497872.5,10.121225,0.271741,1.796980,0.921623,74.330345,0.274237,-0.129694,0.374187,...,181.0,0,31111.0,31111.0,31111.0,30.754047,71.105003,2000.0,31,0.374187
1,5039237.5,539942.5,6.480947,0.596306,-10.170015,2.193870,22.360680,0.290859,-0.842747,0.324124,...,187.0,0,3113.0,3113.0,3113.0,317.719330,107.760887,2002.0,31,0.339236
2,5044547.5,477682.5,10.975347,0.275029,0.666227,0.343284,28.284271,0.298350,-0.093618,0.402057,...,180.0,0,31111.0,31111.0,31111.0,144.860947,141.871002,2000.0,31,0.402057
3,5044942.5,477432.5,5.851149,1.874699,-9.540217,4.113334,20.000000,-4.963612,2.188366,0.382736,...,191.0,0,31111.0,31111.0,31111.0,286.229279,140.513000,2000.0,31,0.382736
4,5045907.5,540982.5,6.548541,0.468189,-10.237609,2.050606,10.000000,-0.424163,0.419770,0.357586,...,175.0,0,2111.0,2111.0,2111.0,167.089020,128.951996,2002.0,21,0.212436
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,5038672.5,506657.5,4.716925,2.704538,-8.405999,12.602149,395.379578,-0.547879,0.292087,0.287325,...,186.0,1,2115.0,1411.0,1411.0,30.427158,143.399994,0.0,14,0.635875
19996,5031042.5,507662.5,12.361214,0.206014,3.574872,0.711363,143.178223,0.424424,-0.139602,0.131484,...,179.0,1,12111.0,12111.0,12111.0,160.105316,116.323997,0.0,12,0.103639
19997,5043337.5,537442.5,9.499601,0.533417,1.820767,1.286905,20.615528,1.159952,-1.036093,0.177463,...,177.0,1,1112.0,1112.0,1112.0,167.138977,137.824997,0.0,11,0.228143
19998,5039352.5,485522.5,8.602279,0.113127,-1.471528,0.526209,323.109894,-0.048511,0.011975,0.147619,...,179.0,1,1111.0,1111.0,1111.0,136.804199,157.609100,0.0,11,0.156583


In [38]:
# one hot encoding of lithology
encoded = geo_enc.transform(flood_training['geologia'].to_numpy().reshape(-1,1))
encoded_df = pd.DataFrame(encoded.toarray(),columns=geo_enc.get_feature_names_out(['geo']))
encoded_df

Unnamed: 0,geo_0,geo_1,geo_2,geo_3,geo_4,geo_5,geo_6
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...
19995,0.0,1.0,0.0,0.0,0.0,0.0,0.0
19996,0.0,1.0,0.0,0.0,0.0,0.0,0.0
19997,0.0,1.0,0.0,0.0,0.0,0.0,0.0
19998,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [39]:
# one hot encoding of land cover
encoded_lc = lc_enc.transform(flood_training['dusaf_year'].to_numpy().reshape(-1,1))
encoded_lc_df = pd.DataFrame(encoded_lc.toarray(),columns=lc_enc.get_feature_names_out(['lc']))
encoded_lc_df

Unnamed: 0,lc_11,lc_12,lc_14,lc_21,lc_22,lc_23,lc_31,lc_32,lc_33,lc_41,lc_51
0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
19995,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19996,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19997,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19998,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [40]:
# add the one hot encoded columns
final_df = flood_training.merge(encoded_df,left_index=True,right_index=True)
final_df = final_df.merge(encoded_lc_df,left_index=True,right_index=True)
flood_col = final_df.pop('flooded') 
final_df.insert(final_df.shape[1], 'flooded', flood_col)
final_df

Unnamed: 0,y,x,twi,tri,spi,slope,water_distance,profile_curvature,plan_curvature,ndvi_2000,...,lc_14,lc_21,lc_22,lc_23,lc_31,lc_32,lc_33,lc_41,lc_51,flooded
0,5013672.5,497872.5,10.121225,0.271741,1.796980,0.921623,74.330345,0.274237,-0.129694,0.374187,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2000.0
1,5039237.5,539942.5,6.480947,0.596306,-10.170015,2.193870,22.360680,0.290859,-0.842747,0.324124,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2002.0
2,5044547.5,477682.5,10.975347,0.275029,0.666227,0.343284,28.284271,0.298350,-0.093618,0.402057,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2000.0
3,5044942.5,477432.5,5.851149,1.874699,-9.540217,4.113334,20.000000,-4.963612,2.188366,0.382736,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2000.0
4,5045907.5,540982.5,6.548541,0.468189,-10.237609,2.050606,10.000000,-0.424163,0.419770,0.357586,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2002.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,5038672.5,506657.5,4.716925,2.704538,-8.405999,12.602149,395.379578,-0.547879,0.292087,0.287325,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19996,5031042.5,507662.5,12.361214,0.206014,3.574872,0.711363,143.178223,0.424424,-0.139602,0.131484,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19997,5043337.5,537442.5,9.499601,0.533417,1.820767,1.286905,20.615528,1.159952,-1.036093,0.177463,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19998,5039352.5,485522.5,8.602279,0.113127,-1.471528,0.526209,323.109894,-0.048511,0.011975,0.147619,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [41]:
final_df.to_csv('Training sets/flood_training_cf_year_no_river_bed.csv', index=False)