# Earth Lab Capstone Project: Where can soil moisture improve rainfall-triggered landslide predictability?

## Author: Jacquelyn Witte

## This Notebook exports data from SMAP and ESA CCI soil moisture, GPM daily and IMERG 30min precipitation, and POLARIS soil properties co-located to Landslides in the US

- Based on 2015-2020 Landslide events from the NASA Global Landslide Catalog (GLC)
- Using Landslide locations over Colorado as a workflow example
- Workflow can be applied to any USA state defined in the GLC

In [1]:
import os
from glob import glob

import datetime as dt
import earthpy as et
import h5py
import numpy as np
import pandas as pd
import re
import rioxarray as rxr
import xarray as xr

import warnings
warnings.filterwarnings('ignore')

## Begin Functions

In [2]:
def read_smap(filepath, index):
    """
    Reads SMAP data and returns the variable of interest.
    
    Parameters
    ----------
    filepath: Str
        File path of a SMAP L3 HDF5 file
        
    group_id: String
        Groups within the file to access
        
    index: int
        Index associated with the variable to retrieve
    
    Returns
    -------
    data: 2D numpy.ndarray (lat, lon)
    date: Date String yyyymmdd
    """
    # Read the file
    group_id = 'Soil_Moisture_Retrieval_Data_PM'
    with h5py.File(filepath, 'r') as f:
        # Extract data info
        data_id = list(f[group_id].keys())[index]
        data = f[group_id][data_id][:,:]
        data[data == f[group_id][data_id].attrs['_FillValue']] = np.nan
        
        filename = os.path.basename(filepath)
        yyyymmdd= filename.split('_')[5]
        yyyy = int(yyyymmdd[0:4])
        mm = int(yyyymmdd[4:6])
        dd = int(yyyymmdd[6:8])
        date=dt.datetime(yyyy,mm,dd)
    return data, date

In [3]:
def findfile(input_files, input_date):
    """
    Returns a single file from a list of files.
    
    Parameters
    ----------
    input_files: List of strings
        List of full path to the file
        
    input_date: String
        YYYYMMDD format
        
    Returns
    -------
    file: Str
    """
    file = [x for x in input_files if re.findall(input_date, x)]
    if not file:
        raise ValueError('File does not exist for '+input_date)
    return file

In [4]:
def nearestneighbor_ncdf(input_file, parameter, loc):
    """
    Extracts nearest neighbor value based on location and desired parameter. 
    
    Parameters
    ----------   
    input_file: Str - full path to a single file
    
    parameter: Str 
    
    loc: tuple (degree longtitude, degree latitude)
    
    Returns
    -------
    float
    """
    # read the netcdf file
    try:
        data_xr = xr.open_dataset(input_file).squeeze()
    except IOError:
        print("This file is not accessible: "+input_file)
    finally:
        data_xr.close()
    
    # subset the file
    res = data_xr[parameter].sel(indexers={
            'lon': loc[0],
            'lat': loc[1]},
            method="nearest")
    
    return float(res.values)

In [5]:
def get_imerg_hires(imerg_files, glc_df):
    """
    Reads all IMERG 30min CSV file into a dataFrame.
    
    Ref: https://www.geeksforgeeks.org/ways-to-filter-pandas-dataframe-by-column-values/

    Parameters
    ----------
    imerg_files: List of strings
        List of full path to the file
    
    glc_df: dataFrame
        Global Landslide Catalog 
    
    Returns
    -------
    imerg: dataFrame
        Contains datetime, landslide ID, precipitation

    """
    id_list = glc_df['event_id'].values.tolist()
    
    list = []
    for f in imerg_files:
        print(f)
        temp_df = pd.read_csv(f)
        # filter for landslide id
        list.append(temp_df[temp_df['id'].isin(id_list)])

    imerg = pd.concat(list)
    # convert datetime to pd datetime because some dates are not in the right format
    imerg['datetime'] = pd.to_datetime(imerg['datetime'])
    # Create a simple date string to compare with the GLC data
    imerg['yyyymmdd'] = pd.to_datetime(imerg['datetime']).dt.strftime('%Y%m%d')
    #imerg.index = pd.to_datetime(imerg.index)
    imerg = imerg.reset_index().set_index('datetime')
    return imerg

In [6]:
def get_polaris_ksat(lat_in, lon_in):
    """
    Reads POLARIS Ksat value.
    
    About POLARIS soil variable
        - 30 m Ksat (Saturated Hydraulic Conductivity of Soil) 
        - 5-15 cm mode in log10(cm/hr)

    Parameters
    ----------
    lat_in, lon_in: float latitude, longitude in degrees
    
    Returns
    -------
    ksat_value: float
        Uses nearest neighbor method to find closest latitude, longitude coord
    """
    polaris_url = 'http://hydrology.cee.duke.edu/POLARIS/PROPERTIES/v1.0/ksat/mode/5_15/'

    # assemble polaris filename
    lat_str = str(int(lat_in)) + str(int(lat_in+1))
    lon_str = str(int(lon_in-1)) + str(int(lon_in))
    filen = 'lat'+lat_str+'_lon'+lon_str+'.tif'
    # read the polaris file
    polaris_xr = rxr.open_rasterio(polaris_url+filen, masked=True).squeeze()
    # subset the polaris file to the nearest lat, lon
    ksat_value = polaris_xr.sel(indexers={
        'x': lon_in,
        'y': lat_in},
        method="nearest").values
    return float(ksat_value)

## Start of the main program

### Choose the US state 

In [7]:
westernUS = ['Colorado', 'Utah', 'Idaho',
             'California', 'Oregon', 'Washington']
state = westernUS[0]

### Read and subset to Landslides >= year 2015 (SMAP data starts in 2015)
- Based on the state chosen

In [8]:
# Read the GLC file
glc_file = os.path.join(et.io.HOME,
                        'earth-analytics',
                        'data', 'capstone', 
                        'landslide', 'nasa_global_landslide_catalog_point.csv')

glc = pd.read_csv(glc_file)

glc_state = glc[(glc['country_code'] == 'US') 
             & (glc['admin_division_name'] == state) 
             & (glc['landslide_trigger'] != 'freeze_thaw') 
             & (glc['landslide_trigger'] != 'snowfall_snowmelt') 
             & (glc['landslide_trigger'] != 'earthquake') 
             & (glc['landslide_trigger'] != 'leaking_pipe') 
             & (glc['landslide_trigger'] != 'no_apparent_trigger') 
             & (glc['landslide_trigger'] != 'other')              
             & (glc['landslide_trigger'] != 'unknown')
            ]
# convert to pandas datetime
glc_state['date'] = pd.to_datetime(glc_state['event_date'])
glc_state = glc_state.set_index('date').sort_index()
glc_state_gt2015 = glc_state[glc_state.index > '2015-04-01']

print(glc_state_gt2015.shape)
print(np.unique(glc_state_gt2015['landslide_trigger']))
print(np.unique(glc_state_gt2015['landslide_category']))
print(glc_state_gt2015.columns)

# Dropping last dataframe if state=Utah - SMAP data does not exist
if state == 'Utah':
    glc_state_gt2015 = glc_state_gt2015.drop(pd.to_datetime(
                                                        '2019-06-26 04:00:00'))

glc_state_gt2015.tail()

(124, 31)
['continuous_rain' 'downpour' 'flooding' 'rain']
['creep' 'debris_flow' 'landslide' 'mudslide' 'riverbank_collapse'
 'rock_fall' 'unknown']
Index(['OBJECTID', 'Shape', 'source_name', 'source_link', 'event_id',
       'event_date', 'event_time', 'event_title', 'event_description',
       'location_description', 'location_accuracy', 'landslide_category',
       'landslide_trigger', 'landslide_size', 'landslide_setting',
       'fatality_count', 'injury_count', 'storm_name', 'photo_link',
       'comments', 'event_import_source', 'event_import_id', 'latitude',
       'longitude', 'country_name', 'country_code', 'admin_division_name',
       'gazetteer_closest_point', 'gazetteer_distance', 'submitted_date',
       'last_edited_date'],
      dtype='object')


Unnamed: 0_level_0,OBJECTID,Shape,source_name,source_link,event_id,event_date,event_time,event_title,event_description,location_description,...,event_import_id,latitude,longitude,country_name,country_code,admin_division_name,gazetteer_closest_point,gazetteer_distance,submitted_date,last_edited_date
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-27 00:00:00,7965456,"(-122.281470636, 47.723066000000074)",KIRO Channel 7,http://www.kiro7.com/news/local/mudslide-trees...,11230,2018-01-27 00:00:00,unknown,Burke-Gilman Trail Slide including 2 Downed Trees,Trail closed between 42nd PL NE and NE 125th d...,"Burke-Gilman Trl, Seattle, Washington, 98125",...,,47.723066,-122.281471,United States,US,Washington,Seattle,15.19,2018-01-30 15:03:38,2022-04-23 06:28:10.000001
2018-01-27 04:00:00,7995024,"(-123.13163354599999, 47.417838896000035)",KOMO News,http://komonews.com/news/local/mudslide-closes...,11246,2018-01-27 04:00:00,04:00,"US 101 Mudslide at Hoodsport, Milepost 330",Mudslide near Cedardale Lane north of Hoodspor...,"milepost 330, US-101, Hoodsport, Washington, 9...",...,,47.417839,-123.131634,United States,US,Washington,Cushman Lake,16.8,2018-01-30 15:03:38,2022-04-23 06:23:59
2018-01-30 00:00:00,7989505,"(-122.36491937299996, 47.475680503000035)",B-Town Blog,http://b-townblog.com/2018/01/30/small-landsli...,11252,2018-01-30 00:00:00,unknown,Seahurst Park Landslide south of Sea Star Shelter,A small landslide has occurred south of Sea St...,"south of Sea Star Shelter, Seahurst Park",...,,47.475681,-122.364919,United States,US,Washington,Seattle Tacoma International,5.4,2018-02-08 16:16:12,2022-04-23 06:44:28
2018-04-12 00:00:00,7977485,"(-123.29357847499995, 46.26644980300006)",The Wahkiakum County Eagle,http://www.waheagle.com/story/2018/05/03/news/...,11455,2018-04-12 00:00:00,,Elochoman Valley Road Slow-Moving Landslide,A slow-moving landslide at milepost 7.47 cause...,"Milepost 7.47, Elochoman Valley Rd, Cathlamet,...",...,,46.26645,-123.293578,United States,US,Washington,Cathlamet,9.41,2018-06-04 14:58:45,2022-04-23 06:36:29
2018-06-02 00:00:00,7972437,"(-120.97715071999994, 47.16860440900007)",Yakima Herald,http://www.yakimaherald.com/news/local/landsli...,11457,2018-06-02 00:00:00,,Mohar Road sinking from landslide,The road is closed at the intersection of Moha...,"2657-3299 Mohar Rd, Cle Elum, Washington, 98922",...,,47.168604,-120.977151,United States,US,Washington,Cle Elum,4.8,2018-06-04 01:17:49,2022-04-23 06:33:06.000001


### Get all SMAP, ESA, GPM and IMERG data files, sorted
 - ESA = Percent of Saturation Soil Moisture
 - SMAP = Volumetric soil moisture in cm3/cm3
 - GPM = Daily precipitation in mm
 - IMERG = 30 min precipitation in mm

In [9]:
data_dir = os.path.join(et.io.HOME,
                        'earth-analytics',
                        'data', 'capstone')

# SMAP 9km files
smap_files = sorted(glob(os.path.join(data_dir, 'smap_9km', '*.h5')))

# GPM daily files
gpm_files = sorted(glob(os.path.join(data_dir, 'gpm_westernUS', '*nc4')))

# GPM 30 min files
gpm_hires_files = sorted(glob(os.path.join(data_dir,
                                           'precip_imerg',
                                           'imerge',
                                           'glc', 'imerge*.csv')))

# ESA soil volume
esa_files = sorted(glob(os.path.join(data_dir, 'esa_soil_moisture',
                                     '*ACTIVE*nc')))

# Print a sample as a sanity check
print(os.path.exists(smap_files[0]))
print(os.path.exists(gpm_files[0]))
print(os.path.exists(gpm_hires_files[0]))
print(os.path.exists(esa_files[0]))

True
True
True
True


### Load the EASE2 grid lon and lat datasets to subset SMAP data. 
- These can be found on the NSIDC website: https://nsidc.org/data/ease/tools#geo_data_files

> Brodzik, M. J., B. Billingsley, T. Haran, B. Raup, M. H. Savoie. 2012. EASE-Grid 2.0: Incremental but Significant Improvements for Earth-Gridded Data Sets. ISPRS International Journal of Geo-Information, 1(1):32-45, doi:10.3390/ijgi1010032. http://www.mdpi.com/2220-9964/1/1/32.

#### These are SMAP variables that can provide key information in characterizing landslides over Colorado

| Variable Name | Index | Units |
| ----------- | ----------- | ----------- |
| soil_moisture | 24 | cm<sup>3</sup>/cm<sup>3</sup> |
| radar_water_body_fraction | 15 | N/A | 
| vegetation_opacity | 46 | N/A |
| vegetation_water_content | 50 | kg/m<sup>2</sup> |

In [10]:
# Reshape to dimensions of the SMAP data above
sm_data, date = read_smap(smap_files[1], 24)

lats = np.fromfile(os.path.join(data_dir, 'smap_9km',
                                'EASE2_M09km.lats.3856x1624x1.double'),
                   dtype=np.float64).reshape(sm_data.shape)
lons = np.fromfile(os.path.join(data_dir, 'smap_9km',
                                'EASE2_M09km.lons.3856x1624x1.double'),
                   dtype=np.float64).reshape(sm_data.shape)
sm_data.shape

(1624, 3856)

### Read the SMAP, ESA CCI and GPM data 
- Two dataframes are generated
    1. Colocated to all the Landslide events
    2. Precipitation measurements going back 7 day from each landslide event and indexed to Landslide ID
        - GPM daily resolution precipitation

In [11]:
precip7d_date = []
precip7d = []
precip_accum = []
precip_max = []
smap_sm = []
smap_sm_7d = []
smap_wc = []
esa_sm = []
esa_sm_7d = []
ksat_list = []
landslide_date = []
landslide_id = []
landslide7d_id = []
landslide_cat = []
landslide_trig = []
landslide_sz = []
lat = []
lon = []
periods = 7
count_down = []

for i, ls_date in enumerate(glc_state_gt2015.index):

    # Get -7 days from the event
    glc_date = pd.date_range(ls_date, periods=periods,
                             freq='-1D').strftime('%Y%m%d')

    # Append landslide metadata
    landslide_date.append(ls_date)
    lat.append(glc_state_gt2015.latitude[i])
    lon.append(glc_state_gt2015.longitude[i])
    landslide_id.append(glc_state_gt2015.event_id[i])
    landslide_cat.append(glc_state_gt2015.landslide_category[i])
    landslide_trig.append(glc_state_gt2015.landslide_trigger[i])
    landslide_sz.append(glc_state_gt2015.landslide_size[i])

    # Append Ksat value
    ksat_list.append(get_polaris_ksat(glc_state_gt2015.latitude[i], 
                     glc_state_gt2015.longitude[i])
                    )
    
    # Take the +/- 0.3 deg mean around the Landslide event for SMAP calculation
    N_lat = glc_state_gt2015.latitude[i]+0.15
    S_lat = glc_state_gt2015.latitude[i]-0.15
    W_lon = glc_state_gt2015.longitude[i]-0.15
    E_lon = glc_state_gt2015.longitude[i]+0.15
    subset = (lats < N_lat) & (lats > S_lat) & (lons > W_lon) & (lons < E_lon)

    # Initialize data
    sm_max = []
    vegwc_max = []
    esa_mean = []
    precip = []
    countd = periods

    # loop over the 7 days
    for yyyymmdd in glc_date:

        # Find the SMAP file
        filesm = findfile(smap_files, yyyymmdd)
        # Retrieve the SMAP variables
        sm, time_t = read_smap(filesm[0], 24)
        vegwc, t = read_smap(filesm[0], 50)
        # Calculate the SMAP max
        sm_max.append(np.nanmax(sm[subset]))
        vegwc_max.append(np.nanmax(vegwc[subset]))

        # Get the Landslide location
        loc = (glc_state_gt2015.longitude[i], glc_state_gt2015.latitude[i])

        # Find the ESA soil moisture file
        file_esa = findfile(esa_files, yyyymmdd)
        # Get the nearest neighbor value of % soil moisture
        res_esa = nearestneighbor_ncdf(file_esa[0], 'sm', loc)
        # Replace negative values with NaN
        if res_esa < 0.0:
            esa_mean.append(np.nan)
        else:
            esa_mean.append(res_esa)

        # find the GPM file
        file_gpm = findfile(gpm_files, yyyymmdd)

        precip7d.append(nearestneighbor_ncdf(file_gpm[0],
                                             'precipitationCal', loc))
        landslide7d_id.append(glc_state_gt2015.event_id[i])
        # Append the date
        precip7d_date.append(yyyymmdd)

        precip.append(nearestneighbor_ncdf(file_gpm[0],
                                           'precipitationCal', loc))

        # Append countdown
        count_down.append(countd)
        countd -= 1

    # Append the summary values for the 7 day period
    smap_sm.append(np.nanmax(sm_max))
    smap_wc.append(np.nanmax(vegwc_max))
    esa_sm.append(np.nanmax(esa_mean))
    smap_sm_7d.extend(sm_max)
    esa_sm_7d.extend(esa_mean)

    # Filter for low or too high precipitation values
    if np.nansum(precip) < 0.4:
        precip_accum.append(np.nan)
        precip_max.append(np.nan)
    else:
        precip_accum.append(np.nansum(precip))
        precip_max.append(np.nanmax(precip))

# Create a soils and precip dataFrame
landslide_df = pd.DataFrame(smap_sm,
                            index=pd.to_datetime(landslide_date),
                            columns=['smap_sm'])
landslide_df['veg_water_content'] = smap_wc
landslide_df['esa_sm_percent'] = esa_sm
landslide_df['gpm_7day_accum_mm'] = precip_accum
landslide_df['gpm_7day_max_mm'] = precip_max
landslide_df['ksat_log10cm/hr'] = ksat_list

# Add the Landslide metadata
landslide_df['glc_lat'] = lat
landslide_df['glc_lon'] = lon
landslide_df['landslide_id'] = landslide_id
landslide_df['landslide_category'] = landslide_cat
landslide_df['landslide_trigger'] = landslide_trig
landslide_df['landslide_size'] = landslide_sz

# Create the 7day precipitation dataFrame
landslide_precip7d_df = pd.DataFrame(precip7d,
                                     index=pd.to_datetime(precip7d_date),
                                     columns=['gpm_precip_mm'])
landslide_precip7d_df['landslide_id'] = landslide7d_id
landslide_precip7d_df['smap_sm'] = smap_sm_7d
landslide_precip7d_df['esa_vol'] = esa_sm_7d


# Add the cumulative precipitation
accum = []
for i, data in landslide_precip7d_df.groupby("landslide_id"):
    res = data['gpm_precip_mm'].cumsum().values
    if res.max() > 0.4:
        accum.extend(np.flip(data['gpm_precip_mm'].cumsum().values))
    else:
        accum.extend([np.nan]*periods)

landslide_precip7d_df['gpm_7day_accum_mm'] = accum
landslide_precip7d_df['days-to-landslide'] = count_down
landslide_precip7d_df.index.names = ['date']

# Add the normalized precipitation
accum_norm = []
for i, data in landslide_precip7d_df.groupby("landslide_id"):
    res = data['gpm_7day_accum_mm']/data['gpm_7day_accum_mm'].mean()
    accum_norm.append(res.max())

landslide_df['gpm_7day_accum_norm'] = accum_norm

In [12]:
landslide_precip7d_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 868 entries, 2015-05-28 to 2018-05-27
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   gpm_precip_mm      868 non-null    float64
 1   landslide_id       868 non-null    int64  
 2   smap_sm            468 non-null    float32
 3   esa_vol            509 non-null    float64
 4   gpm_7day_accum_mm  847 non-null    float64
 5   days-to-landslide  868 non-null    int64  
dtypes: float32(1), float64(3), int64(2)
memory usage: 44.1 KB


In [13]:
landslide_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 124 entries, 2015-05-28 17:30:00 to 2018-06-02 00:00:00
Data columns (total 13 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   smap_sm              121 non-null    float32
 1   veg_water_content    124 non-null    float32
 2   esa_sm_percent       76 non-null     float64
 3   gpm_7day_accum_mm    121 non-null    float64
 4   gpm_7day_max_mm      121 non-null    float64
 5   ksat_log10cm/hr      123 non-null    float64
 6   glc_lat              124 non-null    float64
 7   glc_lon              124 non-null    float64
 8   landslide_id         124 non-null    int64  
 9   landslide_category   124 non-null    object 
 10  landslide_trigger    124 non-null    object 
 11  landslide_size       124 non-null    object 
 12  gpm_7day_accum_norm  121 non-null    float64
dtypes: float32(2), float64(7), int64(1), object(3)
memory usage: 12.6+ KB


### Add the IMERGE 30min resolution precipitation to the landslide_df

#### First, read the IMERGE 30min data

In [14]:
imerg_30min = get_imerg_hires(gpm_hires_files, glc_state_gt2015)

/Users/jwitte/earth-analytics/data/capstone/precip_imerg/imerge/glc/imerge.2015.csv
/Users/jwitte/earth-analytics/data/capstone/precip_imerg/imerge/glc/imerge.2016.csv
/Users/jwitte/earth-analytics/data/capstone/precip_imerg/imerge/glc/imerge.2017.csv
/Users/jwitte/earth-analytics/data/capstone/precip_imerg/imerge/glc/imerge.2018.csv
/Users/jwitte/earth-analytics/data/capstone/precip_imerg/imerge/glc/imerge.2019.csv
/Users/jwitte/earth-analytics/data/capstone/precip_imerg/imerge/glc/imerge.2020.csv


#### Loop over the newly created landslide_df
- Add the sum and the max

In [15]:
imerg_7day_sum = []
imerg_7day_max = []
imerg_7day_accum = []
imerg_7day_date = []
imerg_7day_id = []

for i, ls_date in enumerate(landslide_df.index):

    glc_date = pd.date_range(ls_date, periods=7,
                             freq='-1D').strftime('%Y%m%d').to_list()

    # selecting rows based on condition
    rslt_df = imerg_30min[
                    (imerg_30min['id'] == landslide_df['landslide_id'][i]) &
                    imerg_30min['yyyymmdd'].isin(glc_date)
                    ]
    # first calculate the 7 day precipitation stats
    if rslt_df.shape[0] > 0:
        imerg_7day_sum.append(rslt_df['precipitation'].sum())
        imerg_7day_max.append(rslt_df['precipitation'].max())
        # append the 7 day daily values
        imerg_7day_accum.extend(rslt_df.precipitation.resample('D').sum())
        imerg_7day_date.extend(glc_date)
        imerg_7day_id.extend([landslide_df['landslide_id'][i]]*7)
    else:
        imerg_7day_sum.append(np.nan)
        imerg_7day_max.append(np.nan)
        imerg_7day_accum.extend([np.nan]*7)
        imerg_7day_date.extend(glc_date)
        imerg_7day_id.extend([landslide_df['landslide_id'][i]]*7)

landslide_df['imerg_7day_accum_mm'] = imerg_7day_sum
landslide_df['imerg_7day_max_mm'] = imerg_7day_max
landslide_df.index.name = 'date'

landslide_precip7d_df['imerg_precip_mm'] = imerg_7day_accum

In [16]:
landslide_precip7d_df

Unnamed: 0_level_0,gpm_precip_mm,landslide_id,smap_sm,esa_vol,gpm_7day_accum_mm,days-to-landslide,imerg_precip_mm
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-05-28,0.130090,6996,,34.492840,8.421008,7,
2015-05-27,1.841735,6996,0.238765,49.121403,8.374829,6,
2015-05-26,1.864561,6996,,29.023668,8.374829,5,
2015-05-25,4.526540,6996,0.198632,27.058155,8.362926,4,
2015-05-24,0.011903,6996,0.237414,45.544178,3.836387,3,
...,...,...,...,...,...,...,...
2018-05-31,0.000000,11457,,18.645435,15.254440,5,
2018-05-30,0.000000,11457,0.280904,26.691477,14.833284,4,
2018-05-29,0.034117,11457,,30.477190,10.944297,3,
2018-05-28,0.000000,11457,0.327707,45.889702,10.386200,2,


### Export the dataFrame to a csv file

In [17]:
# converting to CSV file
output_file = os.path.join('data','glc_smap_esa_gpm_2015-2020_'+state+'.csv')
landslide_df.to_csv(output_file)

output_7d_file = os.path.join('data',
                              'glc_smap_esa_gpm_2015-2020_7day_'+state+'.csv')
landslide_precip7d_df.to_csv(output_7d_file)