In [1]:
from pathlib import Path
import pandas as pd
import numpy as np

In [2]:
# Defining directories
dir_path = Path.cwd().parent
prep_folder = dir_path.joinpath("data/prep")
s1_path = prep_folder.joinpath("sent1.parquet")

processed_folder = dir_path.joinpath("data/processed")
if not processed_folder.exists():
    processed_folder.mkdir(parents=True)

In [3]:
df = pd.read_parquet(s1_path)
df

Unnamed: 0,date,long,lat,species_names,vv_Descending,vh_Descending,vv_Ascending,vh_Ascending
0,2019-08-31,75.66669580000007,32.25107380000003,bambusa bamboo,244.0,178.0,,
1,2019-08-31,75.66676390000003,32.25073910000003,bambusa bamboo,324.0,129.0,,
2,2019-08-31,75.66686810000004,32.25026770000005,bambusa bamboo,168.0,109.0,,
3,2019-08-31,75.66719240000003,32.249825100000066,bambusa bamboo,257.0,123.0,,
4,2019-08-31,75.66729090000007,32.24962370000003,bambusa bamboo,224.0,87.0,,
...,...,...,...,...,...,...,...,...
75411374,2021-11-19,76.47167330000008,33.02108330000004,himalayan birch,80.0,52.0,,
75411375,2021-11-19,76.77044000000006,32.84087500000004,himalayan birch,70.0,50.0,,
75411376,2021-11-19,76.85772540000005,32.87289300000003,himalayan birch,100.0,70.0,,
75411377,2021-11-19,76.85831560000008,32.87155080000008,himalayan birch,76.0,75.0,,


In [4]:
print(
    df['vh_Ascending'].isna().value_counts(),
df['vv_Ascending'].isna().value_counts(),
df['vh_Descending'].isna().value_counts(),
df['vv_Descending'].isna().value_counts()
)

vh_Ascending
False    40802798
True     34608581
Name: count, dtype: int64 vv_Ascending
False    40802808
True     34608571
Name: count, dtype: int64 vh_Descending
True     40842130
False    34569249
Name: count, dtype: int64 vv_Descending
True     40802821
False    34608558
Name: count, dtype: int64


In [5]:
#scaling fpr sentinel 1 
for band in ["vh_Ascending", "vv_Ascending", "vh_Descending", "vv_Descending"]:
    df[band] = 10 * np.log10(df[band])

# Extracting month and year
df["year"] = pd.to_datetime(df["date"]).dt.year
df["month"] = pd.to_datetime(df["date"]).dt.month
df.drop('date', axis=1, inplace=True)


In [6]:
print(
    df['vh_Ascending'].isna().value_counts(),
df['vv_Ascending'].isna().value_counts(),
df['vh_Descending'].isna().value_counts(),
df['vv_Descending'].isna().value_counts()
)

vh_Ascending
False    40802798
True     34608581
Name: count, dtype: int64 vv_Ascending
False    40802808
True     34608571
Name: count, dtype: int64 vh_Descending
True     40842130
False    34569249
Name: count, dtype: int64 vv_Descending
True     40802821
False    34608558
Name: count, dtype: int64


In [7]:
def make_s1_indices(df):
    #### SENTINEL 
    ## VV VH Ratio
    df['VV_VH_A'] = df['vv_Ascending'] / df['vh_Ascending']
    df['VV_VH_D'] = df['vv_Descending'] / df['vh_Descending']

    ## VH VV Ratio
    df['VH_VV_A'] = df['vh_Ascending'] / df['vv_Ascending']
    df['VH_VV_D'] = df['vh_Descending'] / df['vv_Descending']

    ## SAR Normalized Difference Vegetation Index 
    df['SAR_NDVI_A'] = (df['vh_Ascending'] - df['vv_Ascending']) / (df['vh_Ascending'] + df['vv_Ascending'])
    df['SAR_NDVI_D'] = (df['vh_Descending'] - df['vv_Descending']) / (df['vh_Descending'] + df['vv_Descending'])

    ## Difference Vegetation Index
    df['DVI_A'] = df['vh_Ascending'] - df['vv_Ascending']
    df['DVI_D'] = df['vh_Descending'] - df['vv_Descending']

    ## Sum Vegetation Index
    df['SVI_A'] = df['vh_Ascending'] + df['vv_Ascending']
    df['SVI_D'] = df['vh_Descending'] + df['vv_Descending']

    ## Relative Difference Vegetation Index (RDVI)
    df['RDVI_A'] = df['VH_VV_A'] - df['VV_VH_A']
    df['RDVI_D'] = df['VH_VV_D'] - df['VV_VH_D']

    ## Normalized Relative Difference Vegetation Index
    df['NRDVI_A'] = df['RDVI_A'] / (df['VH_VV_A'] + df['VV_VH_A'])
    df['NRDVI_D'] = df['RDVI_D'] / (df['VH_VV_D'] + df['VV_VH_D'])

    ## Sum of Squared Difference Vegetation Indexsrc/make_model_df.ipynb
    df['SSDVI_A'] = df['vh_Ascending'].pow(2) - df['vv_Ascending'].pow(2)
    df['SSDVI_D'] = df['vh_Descending'].pow(2) - df['vv_Descending'].pow(2)

    return df

df_indices = make_s1_indices(df)
df_indices

Unnamed: 0,long,lat,species_names,vv_Descending,vh_Descending,vv_Ascending,vh_Ascending,year,month,VV_VH_A,...,DVI_A,DVI_D,SVI_A,SVI_D,RDVI_A,RDVI_D,NRDVI_A,NRDVI_D,SSDVI_A,SSDVI_D
0,75.66669580000007,32.25107380000003,bambusa bamboo,23.873898,22.504200,,,2019,8,,...,,-1.369698,,46.378098,,-0.118236,,-0.059015,,-63.524000
1,75.66676390000003,32.25073910000003,bambusa bamboo,25.105450,21.105897,,,2019,8,,...,,-3.999553,,46.211347,,-0.348809,,-0.171811,,-184.824732
2,75.66686810000004,32.25026770000005,bambusa bamboo,22.253093,20.374265,,,2019,8,,...,,-1.878828,,42.627358,,-0.176646,,-0.087980,,-80.089466
3,75.66719240000003,32.249825100000066,bambusa bamboo,24.099331,20.899051,,,2019,8,,...,,-3.200280,,44.998382,,-0.285926,,-0.141524,,-144.007428
4,75.66729090000007,32.24962370000003,bambusa bamboo,23.502480,19.395193,,,2019,8,,...,,-4.107288,,42.897673,,-0.386528,,-0.189753,,-176.193082
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75411374,76.47167330000008,33.02108330000004,himalayan birch,19.030900,17.160033,,,2021,11,,...,,-1.870866,,36.190933,,-0.207331,,-0.103113,,-67.708402
75411375,76.77044000000006,32.84087500000004,himalayan birch,18.450980,16.989700,,,2021,11,,...,,-1.461280,,35.440680,,-0.165208,,-0.082323,,-51.788770
75411376,76.85772540000005,32.87289300000003,himalayan birch,20.000000,18.450980,,,2021,11,,...,,-1.549020,,38.450980,,-0.161404,,-0.080441,,-59.561322
75411377,76.85831560000008,32.87155080000008,himalayan birch,18.808136,18.750613,,,2021,11,,...,,-0.057523,,37.558749,,-0.006126,,-0.003063,,-2.160503


### Making Monthly Medians

In [10]:
index_cols = ["long", "lat", "year", "month", "species_names"]
#making monthlu medians
df_month_medians = (
    df_indices.groupby(index_cols)
    .median()
    .reset_index()
)

df_month_medians

Unnamed: 0,long,lat,year,month,species_names,vv_Descending,vh_Descending,vv_Ascending,vh_Ascending,VV_VH_A,...,DVI_A,DVI_D,SVI_A,SVI_D,RDVI_A,RDVI_D,NRDVI_A,NRDVI_D,SSDVI_A,SSDVI_D
0,75.66669580000007,32.25107380000003,2018,1,bambusa bamboo,23.174552,19.014211,22.405492,19.777236,1.104054,...,-2.111655,-4.160341,42.699330,42.188762,-0.198301,-0.398812,-0.098667,-0.195303,-90.166242,-175.427345
1,75.66669580000007,32.25107380000003,2018,2,bambusa bamboo,23.242825,19.956352,22.166288,21.461723,1.033557,...,-0.704565,-3.694342,43.628012,43.371197,-0.065693,-0.344880,-0.032812,-0.169932,-30.259764,-156.206808
2,75.66669580000007,32.25107380000003,2018,3,bambusa bamboo,21.281183,18.450980,23.676237,20.093502,1.179084,...,-3.582735,-2.830202,43.769740,39.732163,-0.330660,-0.291276,-0.163029,-0.143803,-156.466199,-111.066850
3,75.66669580000007,32.25107380000003,2018,4,bambusa bamboo,22.552725,19.867717,23.384565,19.956352,1.212000,...,-4.090376,-2.337480,44.595282,41.528996,-0.386918,-0.218054,-0.189937,-0.108385,-174.572138,-100.525548
4,75.66669580000007,32.25107380000003,2018,5,bambusa bamboo,22.848040,19.700091,22.878017,19.956352,1.116375,...,-2.467820,-3.147949,41.451342,42.548131,-0.220619,-0.303061,-0.109644,-0.149384,-110.754026,-132.122432
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12129091,78.12624330000006,31.258713300000068,2024,8,rhododendron,23.092960,19.633255,22.068678,21.522453,1.025432,...,-0.546225,-2.763381,43.591130,43.201884,-0.049914,-0.255674,-0.024937,-0.126749,-23.920265,-119.981582
12129092,78.12624330000006,31.258713300000068,2024,9,rhododendron,19.493900,19.030900,25.024271,21.105897,1.156163,...,-3.446241,-2.238640,44.801507,36.569602,-0.291233,-0.250365,-0.144097,-0.124213,-163.981665,-78.146481
12129093,78.12624330000006,31.258713300000068,2024,10,rhododendron,17.960884,17.548700,22.818354,20.740081,1.100403,...,-2.078273,-0.230907,43.558436,35.348660,-0.191551,-0.024033,-0.095325,-0.012014,-90.421479,-8.881958
12129094,78.12624330000006,31.258713300000068,2024,11,rhododendron,18.920946,16.720979,22.174839,19.344985,1.129331,...,-2.461291,-2.199967,41.519824,35.641925,-0.243852,-0.247841,-0.121029,-0.122980,-105.804204,-78.411074


### Making Season Medians 

In [12]:
# creating seasons
season_index_cols = ["long", "lat", "season", "species_names"]


conds = [df_month_medians["month"].isin([3, 4, 5]), df_month_medians["month"].isin([10, 11, 12])]
opts = ["summer", "winter"]


df_month_medians["season"] = np.select(conds, opts, default="NA")
df_season_medians = df_month_medians[~(df_month_medians["season"] == "NA")]
df_season_medians = df_season_medians.drop(["year", "month"], axis=1)


df_season_medians = df_season_medians.groupby(season_index_cols).median().reset_index()
df_season_medians

Unnamed: 0,long,lat,season,species_names,vv_Descending,vh_Descending,vv_Ascending,vh_Ascending,VV_VH_A,VV_VH_D,...,DVI_A,DVI_D,SVI_A,SVI_D,RDVI_A,RDVI_D,NRDVI_A,NRDVI_D,SSDVI_A,SSDVI_D
0,75.66669580000007,32.25107380000003,summer,bambusa bamboo,23.268281,20.063343,22.649717,20.293838,1.120731,1.163929,...,-2.467820,-3.147949,43.769740,43.768323,-0.228456,-0.303061,-0.113490,-0.149384,-107.109181,-135.341979
1,75.66669580000007,32.25107380000003,winter,bambusa bamboo,23.263359,20.024829,23.829795,20.665937,1.155927,1.161241,...,-3.213192,-3.037884,44.380509,43.458832,-0.290821,-0.299319,-0.143897,-0.147816,-141.958396,-126.133744
2,75.66676390000003,32.25073910000003,summer,bambusa bamboo,22.741578,19.294189,20.606978,17.558749,1.169895,1.188880,...,-2.753680,-3.424227,37.769916,42.265484,-0.315117,-0.347752,-0.155639,-0.171306,-108.224494,-139.602754
3,75.66676390000003,32.25073910000003,winter,bambusa bamboo,22.861569,19.822712,21.430148,17.856214,1.163627,1.154994,...,-2.977274,-2.909095,39.494368,42.814092,-0.304246,-0.289188,-0.150393,-0.143106,-117.209761,-126.443378
4,75.66686810000004,32.25026770000005,summer,bambusa bamboo,23.820170,21.103939,22.041200,20.128372,1.112035,1.142857,...,-2.189438,-2.959898,42.169572,45.017848,-0.212783,-0.267857,-0.105794,-0.132743,-90.367477,-131.123319
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
288783,78.12614830000007,31.258660000000077,winter,rhododendron,20.729399,18.222193,23.053514,20.899051,1.103303,1.104609,...,-2.215715,-1.956642,43.576490,38.820666,-0.190022,-0.191686,-0.093523,-0.094224,-104.175656,-82.358827
288784,78.12618260000005,31.25882800000005,summer,rhododendron,20.170333,18.388491,23.283796,20.569049,1.106720,1.113421,...,-2.270036,-2.041200,43.710179,38.424844,-0.203148,-0.210680,-0.101054,-0.104669,-98.844784,-80.605957
288785,78.12618260000005,31.25882800000005,winter,rhododendron,20.934217,18.750613,22.487620,20.334238,1.099983,1.128706,...,-2.227037,-2.386038,42.244813,40.311660,-0.187279,-0.242736,-0.092686,-0.120484,-98.986267,-93.814746
288786,78.12624330000006,31.258713300000068,summer,rhododendron,20.211893,18.195439,23.856063,21.488256,1.092579,1.100190,...,-2.200201,-2.190940,44.755114,37.654700,-0.177313,-0.191256,-0.088310,-0.095194,-109.419844,-82.660233


There are no NaNs in the resulting frame because its created from monthly medians

### Widening the frame

In [14]:
df_wide = df_season_medians.pivot(index=['long', 'lat', 'species_names'], columns=['season']).reset_index()
new_cols = ["".join(x) for x in df_wide.columns]
df_wide.columns = new_cols
df_wide

Unnamed: 0,long,lat,species_names,vv_Descendingsummer,vv_Descendingwinter,vh_Descendingsummer,vh_Descendingwinter,vv_Ascendingsummer,vv_Ascendingwinter,vh_Ascendingsummer,...,RDVI_Dsummer,RDVI_Dwinter,NRDVI_Asummer,NRDVI_Awinter,NRDVI_Dsummer,NRDVI_Dwinter,SSDVI_Asummer,SSDVI_Awinter,SSDVI_Dsummer,SSDVI_Dwinter
0,75.66669580000007,32.25107380000003,bambusa bamboo,23.268281,23.263359,20.063343,20.024829,22.649717,23.829795,20.293838,...,-0.303061,-0.299319,-0.113490,-0.143897,-0.149384,-0.147816,-107.109181,-141.958396,-135.341979,-126.133744
1,75.66676390000003,32.25073910000003,bambusa bamboo,22.741578,22.861569,19.294189,19.822712,20.606978,21.430148,17.558749,...,-0.347752,-0.289188,-0.155639,-0.150393,-0.171306,-0.143106,-108.224494,-117.209761,-139.602754,-126.443378
2,75.66686810000004,32.25026770000005,bambusa bamboo,23.820170,23.820170,21.103939,21.139434,22.041200,22.741578,20.128372,...,-0.267857,-0.239757,-0.105794,-0.118650,-0.132743,-0.119027,-90.367477,-108.608012,-131.123319,-119.597271
3,75.66719240000003,32.249825100000066,bambusa bamboo,22.982985,23.463530,20.969100,20.902206,23.172638,23.221306,21.072100,...,-0.214604,-0.256731,-0.093882,-0.131304,-0.106613,-0.125988,-92.344290,-131.663300,-100.125021,-135.078548
4,75.66729090000007,32.24962370000003,bambusa bamboo,22.893769,23.415235,20.631469,20.681859,23.302525,23.009431,20.681859,...,-0.272134,-0.222875,-0.121364,-0.118761,-0.134806,-0.110752,-111.210932,-119.285919,-122.545141,-113.844266
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
144389,78.12497830000007,31.25865170000003,rhododendron,19.558451,19.129077,17.003553,17.132556,22.647922,22.478554,20.413927,...,-0.249542,-0.214223,-0.120071,-0.110047,-0.123811,-0.106502,-111.376078,-105.284091,-90.506164,-63.260309
144390,78.12522000000007,31.258880000000033,rhododendron,18.325089,18.310009,16.627578,17.392111,23.201463,23.304138,20.718820,...,-0.214484,-0.239068,-0.114863,-0.125795,-0.106631,-0.118615,-118.141058,-112.136237,-71.033458,-78.242384
144391,78.12614830000007,31.258660000000077,rhododendron,19.411484,20.729399,17.558749,18.222193,24.116197,23.053514,21.303338,...,-0.235199,-0.191686,-0.110914,-0.093523,-0.116795,-0.094224,-124.162556,-104.175656,-89.842285,-82.358827
144392,78.12618260000005,31.25882800000005,rhododendron,20.170333,20.934217,18.388491,18.750613,23.283796,22.487620,20.569049,...,-0.210680,-0.242736,-0.101054,-0.092686,-0.104669,-0.120484,-98.844784,-98.986267,-80.605957,-93.814746


### Exporting Frames

In [15]:
sent_1_month_medians_path = processed_folder.joinpath("sent_1_month_medians.parquet")
sent_1_season_medians_path = processed_folder.joinpath("sent_1_season_medians.parquet")
sent1_season_medians_widened_path = processed_folder.joinpath("sent1_season_medians_widened.csv")

In [16]:
df_month_medians.to_parquet(sent_1_month_medians_path, index=False)
df_season_medians.to_parquet(sent_1_season_medians_path, index=False)

In [17]:
df_wide.to_csv(sent1_season_medians_widened_path, index=False)