# Creating the Input Dataset

## Layer Data
The purpose of this notebook is to gather the details about each layer for each snow pit with no tree canopy in the Grand Mesa. 

### Parameters
* Depth 
    * Each depth for each measurement layer is rounded to the nearest multiple of 10 centimeters
* Density
    * Density rounded to each 10 
* Temperature 
    * Temperature taken at each layer, assuming temperature is the temperature is the same for the entire layer, given a single measurement
* Height
    * Max of each density for each snow pit
* Grain Size 
    * Conversions of data made:
        * "< 1 mm" = 0.5    
        * "1-2 mm" = 1.5
        * "2-4 mm" = 3
        * "4-6 mm " = 5
        * "> 6 mm" = 6.5
        * "[]" = NULL
    * If there are multiple grain sizes in a single layer, we are taking the median

In [2]:
qry = '''
;With Height as (
    --snow pit height
    SELECT DISTINCT 
        pit_id
        , date
        , type
        , max(depth) as height
    FROM public.layers 
    WHERE type = 'density'
    GROUP BY 
        pit_id
        , date
        , type
), 
Density as (
    --snow pit density
    SELECT DISTINCT 
        pit_id
        , date
        , type
        , ROUND(CAST(depth as numeric), -1) as depth
        , bottom_depth
        , value as density
    FROM public.layers 
    WHERE type = 'density'
), 
Temp as (
    --snow pit temp
    SELECT pit_id
        , date
        , type
        , ROUND(CAST(depth as numeric), -1) as depth
        , value as temperature
    FROM public.layers 
    WHERE type = 'temperature'
), 
Grain as (
    --snow pit grain size
    SELECT pit_id
        , date
        , type
        , ROUND(CAST(depth as numeric), -1) as depth
        , CASE 
            WHEN value = '< 1 mm' then 0.5  
            WHEN value = '1-2 mm' then 1.5
            WHEN value = '2-4 mm' then 3
            WHEN value = '2-4mm' then 3
            WHEN value = '4-6 mm' then 5
            WHEN value = '4-6 mm ' then 5
            WHEN value = '> 6 mm' then 6.5
            else NULL 
        END as grain_size
        , value as original_grain_size
    FROM public.layers 
    WHERE type = 'grain_size'
)
SELECT DISTINCT 
    S.site_name
    , L.pit_id
    , S.latitude
    , S.longitude
    , L.date
    , H.height
    , T.depth
    , D.density
    , T.temperature
    , G.grain_size
    , G.original_grain_size
FROM public.sites S 
    INNER JOIN public.layers L on L.pit_id = S.pit_id and L.date = S.date
    LEFT JOIN Height H on H.pit_id = L.pit_id and H.date = L.date
    LEFT JOIN Density D on D.pit_id = L.pit_id and D.date = L.date and D.depth = ROUND(CAST(L.depth as numeric), -1)
    LEFT JOIN Temp T on T.pit_id = L.pit_id and T.date = L.date and T.depth =  ROUND(CAST(L.depth as numeric), -1)
    LEFT JOIN Grain G on G.pit_id = L.pit_id and G.date = L.date and G.depth = ROUND(CAST(L.depth as numeric), -1)
WHERE S.site_name = 'Grand Mesa' and S.tree_canopy = 'No Trees' 
ORDER BY L.pit_id, T.depth
'''

In [3]:
# Import the connection function from the snowexsql library
from snowexsql.db import get_db

# This is what you will use for all of hackweek to access the db
db_name = 'snow:hackweek@db.snowexdata.org/snowex'

# Using the function get_db, we receive 2 ways to interact with the database
engine, session = get_db(db_name)


In [4]:
import pandas as pd

df = pd.read_sql(qry, engine)
df

Unnamed: 0,site_name,pit_id,latitude,longitude,date,height,depth,density,temperature,grain_size,original_grain_size
0,Grand Mesa,COGM1C14_20200131,39.047216,-108.198415,2020-01-31,81.0,0.0,,-1.1,,
1,Grand Mesa,COGM1C14_20200131,39.047216,-108.198415,2020-01-31,81.0,10.0,,-2.2,,
2,Grand Mesa,COGM1C14_20200131,39.047216,-108.198415,2020-01-31,81.0,20.0,284.0,-3.3,3.0,2-4 mm
3,Grand Mesa,COGM1C14_20200131,39.047216,-108.198415,2020-01-31,81.0,30.0,315.5,-4.3,1.5,1-2 mm
4,Grand Mesa,COGM1C14_20200131,39.047216,-108.198415,2020-01-31,81.0,40.0,271.0,-5.5,,
...,...,...,...,...,...,...,...,...,...,...,...
1736,Grand Mesa,COGMWO_20200409_1615,39.033980,-108.213900,2020-04-09,113.0,90.0,413.3333333333333,-0.2,,
1737,Grand Mesa,COGMWO_20200409_1615,39.033980,-108.213900,2020-04-09,113.0,100.0,358.0,-0.2,0.5,< 1 mm
1738,Grand Mesa,COGMWO_20200409_1615,39.033980,-108.213900,2020-04-09,113.0,100.0,358.0,-0.2,,
1739,Grand Mesa,COGMWO_20200409_1615,39.033980,-108.213900,2020-04-09,113.0,110.0,332.5,-0.1,0.5,< 1 mm


In [5]:
df.groupby(['pit_id', 'depth']).agg({
    'grain_size': 'median'
}).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,grain_size
pit_id,depth,Unnamed: 2_level_1
COGM1C14_20200131,0.0,
COGM1C14_20200131,10.0,
COGM1C14_20200131,20.0,3.0
COGM1C14_20200131,30.0,1.5
COGM1C14_20200131,40.0,


In [6]:
 df.groupby(['pit_id', 'depth'])['grain_size'].median().head()

pit_id             depth
COGM1C14_20200131  0.0      NaN
                   10.0     NaN
                   20.0     3.0
                   30.0     1.5
                   40.0     NaN
Name: grain_size, dtype: float64

In [7]:
df_medians = df.groupby(['pit_id', 'depth'])['grain_size'].median().reset_index()
df_medians.head()

Unnamed: 0,pit_id,depth,grain_size
0,COGM1C14_20200131,0.0,
1,COGM1C14_20200131,10.0,
2,COGM1C14_20200131,20.0,3.0
3,COGM1C14_20200131,30.0,1.5
4,COGM1C14_20200131,40.0,


In [9]:
df_all = df_medians.merge(df.iloc[:, :-2], on=('pit_id', 'depth'), how='left')
df_all = df_all.drop_duplicates()
df_all.to_csv('layer_data.csv')

## Taking Average of all the columns for a single pit
---
The following code is written in R. It takes in the output extracted from the database, computes the average of the input parameters across all layers in a pit and creates a single avergaed vlue for each snow pit of the Grand Mesa Site.

title: "Converting Multi-layer data to single layer data"
author: "Shrusti Ghela"
date: "7/13/2022"
---
```{r}
data <- read.csv("~/Desktop/layer_data.csv")
```

```{r}
data <- as.data.frame(data)
```

```{r}
library(dplyr)
library(tidyr)

data <- data %>%                                        
  group_by(pit_id) %>%
  mutate(grain_size = replace_na(grain_size,mean(grain_size, na.rm = TRUE)))

data <- data %>%                                        
  group_by(pit_id) %>%
  mutate(density = replace_na(density,mean(density, na.rm = TRUE)))
```

```{r}
head(data)
```

```{r}
layer_merge <- data %>%
   group_by(pit_id, height, site_name, latitude, longitude, date ) %>% 
   summarise_at(vars("grain_size", "density", "temperature"), mean)
```

```{r}
layer_merge$b <- 0.3120 + 0.2773*(layer_merge$grain_size) + 0.0040223*(layer_merge$density)
layer_merge$zeta <- 15225*(layer_merge$grain_size)^(-0.978)
```

```{r}
write.csv(layer_merge, "~/Desktop/layer_merge.csv")
```

# Creating the Output Dataset

In [10]:
from snowexsql.db import get_db
from snowexsql.data import ImageData, SiteData, LayerData
from rasterio.plot import show
from sqlalchemy.sql import func
import geoalchemy2.functions as gfunc
from geoalchemy2.types import Raster
from geoalchemy2.shape import to_shape
import geopandas as gpd
from snowexsql.conversions import raster_to_rasterio
from snowexsql.conversions import points_to_geopandas, query_to_geopandas, query_to_pandas
import matplotlib.pyplot as plt
import numpy as np
from datetime import date
import pandas as pd

In [11]:
def get_bufferedPit(pit_id):
    # Distance around the pit to collect data in meters
    buffer_dist = 50

    datasets = []

    # Grab our sites details by site id
    q = session.query(LayerData).filter(LayerData.pit_id==pit_id)
    sites = q.all()

    # Grab the pit location from a single layer
    p = sites[0].geom

    # Convert the point to a pyshapely
    pit = to_shape(p)

    # Convert it to a geopandas dataframe for easy plotting
    df_pit = gpd.GeoSeries(pit)
    q = session.query(gfunc.ST_Buffer(p, buffer_dist))
    buffered_pit = q.all()[0][0]
    return buffered_pit

In [12]:
def average_amplitudes(raster):
    """
    input: 2 dimensional array of list of amplitudes over multiple rasters
    output: average amplitude over entire raster
    """
    #create a variable to average each individual raster
    single_avgs = []
    #iterate over all rasters
    for i in range(len(raster)): 
        #compute and append the mean of the raster 
        single_avgs.append(sum(dataset.read(1)[i])/len(dataset.read(1)[i]))
    
    #compute the average amplitude over all rasters
    avg_amp = sum(single_avgs)/len(single_avgs) 
    
    return avg_amp

In [13]:
def average_polarizations(csv1, csv2):
    '''
    Input: two csv name paths, each containing a pit_id and amplitude
    
    Output: Pandas dataframe containing pit_id's and average amperage between the two
    
    '''
    #read in files
    c1 = pd.read_csv(csv1)
    c2 = pd.read_csv(csv2)
    
    #rename columns, remove unnamed index columns
    c1 = c1[['0', '1']]
    c1.columns = ['pit_id', 'c1_amp']
    c2 = c2[['0', '1']]
    c2.columns = ['pit_id', 'c2_amp']
    
    #merge together on pit_id
    polars = c1.merge(c2, on=['pit_id'], how='left')
    polars['avg_amp'] = (polars.c1_amp + polars.c2_amp) / 2
    
    return polars[['pit_id', 'avg_amp']]

In [24]:
# Pit Site Identifier of interest
site_name = 'Grand Mesa'

# Connect to the database we made.
db_name = 'snow:hackweek@db.snowexdata.org/snowex'
engine, session = get_db(db_name)

datasets = []
amplitudes = []

# Grab our sites details by site id
#q = session.query(SiteData).filter(SiteData.site_id==site_id)
q = session.query(SiteData.site_id).filter(SiteData.site_name==site_name)
q = q.filter(SiteData.tree_canopy=="No Trees")
q = q.filter(SiteData.date>=date(2020,1,28))
q = q.filter(SiteData.date<=date(2020,3,1))
sites = q.all()
sites = [d[0] for d in sites] 
q_layer = session.query(LayerData.pit_id).filter(LayerData.site_id.in_(sites))
q_layer = q_layer.distinct()
df = query_to_pandas(q_layer, engine)
df.sort_values(by=['pit_id'],ascending=True)

# Grab the rasters, union them and convert them as tiff when done
q_raster = session.query(func.ST_AsTiff(func.ST_Union(ImageData.raster, type_=Raster)))

# Only grab rasters that are the bare earth DEM from USGS
q_raster = q_raster.filter(ImageData.type == 'insar amplitude').filter(ImageData.observers=='UAVSAR team, JPL').filter(ImageData.site_name == "Grand Mesa")
q_raster = q_raster.filter(ImageData.description.in_
                           (["Overpass Duration: 2020-02-12 16:47:20 - 2020-02-12 16:49:45 (UTC), DEM used = Intermap Elevation Model DTM, Polarization = HH"]))

#q_raster = q_raster.filter(ImageData.description.in_
#                       (["Overpass Duration: 2020-02-01 02:13:16 - 2020-02-01 02:15:58 (UTC), DEM used = Intermap Elevation Model DTM, Polarization = "+i,
#                       "Overpass Duration: 2020-02-12 16:47:20 - 2020-02-12 16:49:45 (UTC), DEM used = Intermap Elevation Model DTM, Polarization = "+i]))

for id in df.pit_id:
    try:
        buffered_pit = get_bufferedPit(id)
        q_raster_new = q_raster.filter(gfunc.ST_Intersects(ImageData.raster, buffered_pit))
        rasters = q_raster_new.all()
        dataset = raster_to_rasterio(session, rasters)[0]
        avg_amplitudes = average_amplitudes(dataset.read(1))
        amplitudes.append((id,avg_amplitudes))
    except TypeError:
        #print("I failed")
        continue
amp = pd.DataFrame(amplitudes)
amp.to_csv('amplitudes_HH2.csv')
    

## Merging Amplitudes

In [27]:
HH1 = 'amplitudes_HH.csv'
HH2 = 'amplitudes_HH2.csv'
HH_avg = average_polarizations(HH1, HH2)
HH_avg.head()

HV1 = 'amplitudes_HV.csv'
HV2 = 'amplitudes_HV2.csv'
HV_avg = average_polarizations(HV1, HV2)
HV_avg.head()

VV1 = 'amplitudes_VV.csv'
VV2 = 'amplitudes_VV2.csv'
VV_avg = average_polarizations(VV1, VV2)
VV_avg.head()

VH1 = 'amplitudes_VH.csv'
VH2 = 'amplitudes_VH2.csv'
VH_avg = average_polarizations(VH1, VH2)
VH_avg.head()

Unnamed: 0,pit_id,avg_amp
0,COGMSO_20200321_1006,0.054984
1,COGM2S37_20200201,0.034575
2,COGM5S31_20200130,0.034575
3,COGMCO_20200318_0825,0.047769
4,COGMSO_20200328_1630,0.054984


In [28]:
HH_avg.columns = ['pit_id', 'HH']
HV_avg.columns = ['pit_id', 'HV']
VV_avg.columns = ['pit_id', 'VV']
VH_avg.columns = ['pit_id', 'VH']

Amplitudes = HH_avg.merge(HV_avg, on='pit_id', how='left')
Amplitudes = Amplitudes.merge(VH_avg, on='pit_id', how='left')
Amplitudes = Amplitudes.merge(VV_avg, on='pit_id', how='left')
Amplitudes

Unnamed: 0,pit_id,HH,HV,VH,VV
0,COGMSO_20200321_1006,0.151810,0.054633,0.054984,0.123060
1,COGM2S37_20200201,0.090272,0.033710,0.034575,0.071622
2,COGM5S31_20200130,0.090272,0.033710,0.034575,0.071622
3,COGMCO_20200318_0825,0.149241,0.049581,0.047769,0.103564
4,COGMSO_20200328_1630,0.151810,0.054633,0.054984,0.123060
...,...,...,...,...,...
123,COGM2S3_20200129,0.069852,0.027647,0.032221,0.069523
124,COGMSO_20191219_1600,0.151810,0.054633,0.054984,0.123060
125,COGM1N20_20200205,0.094391,0.034148,0.036019,0.078598
126,COGM2S35_20200130,0.090272,0.033710,0.034575,0.071622


In [29]:
data = pd.read_csv('final_data.csv')

In [30]:
pits = data[['pit_id', 'height', 'grain_size', 'density', 'temperature']]
pits.head()

Unnamed: 0,pit_id,height,grain_size,density,temperature
0,COGM1C1_20200131,83,1.375,249.388889,-5.71
1,COGM1C1_20200208,77,1.571429,289.1875,-4.822222
2,COGM1C14_20200131,81,2.15,263.142857,-5.488889
3,COGM1C5_20200212,75,1.416667,302.833333,-6.577778
4,COGM1C7_20200131,96,1.75,277.833333,-5.990909


In [31]:
final_data = Amplitudes.merge(pits, how = 'left', on = 'pit_id')

In [32]:
final_data.to_csv('final_data(2).csv')