Merge pull request #7 from esowc/find-efas-cw-overlap

Find efas cw overlap
ECMWFCode4Earth · Aug 14, 2022 · 49f3311 · 49f3311
2 parents f8dd3cc + db2264a
commit 49f3311
Show file tree

Hide file tree

Showing 7 changed files with 372 additions and 647 deletions.
diff --git a/1_nb_read_efas_glofas.ipynb b/1_nb_read_efas_glofas.ipynb
diff --git a/2_nb_eda_cw.ipynb b/2_nb_eda_cw.ipynb
diff --git a/3_s_find_efas_id_cw.py b/3_s_find_efas_id_cw.py
@@ -0,0 +1,55 @@
+from cw.io import read_cw_data, read_efas_data
+from cw.cfg import DATA_DIR
+import matplotlib.pylab as plt
+import numpy as np
+from haversine import haversine, Unit
+
+cw_data = read_cw_data()
+
+keep = ['ROOT_ID', 'LATITUDE', 'LONGITUDE', 'WATER_LEVEL', 'SPOTTED_AT']
+cw_data = cw_data.filter(keep)
+
+cw_data = cw_data[cw_data.WATER_LEVEL.notna()]
+cw_data = cw_data[cw_data.WATER_LEVEL != "false" ]
+
+reading_frequency = np.unique(cw_data.ROOT_ID, return_counts = True)
+sorted_freq = sorted(zip(reading_frequency[1], reading_frequency[0]), reverse=True)
+tuples = zip(*sorted_freq)
+freq, station_id = [ list(tuple) for tuple in tuples]
+
+index = [i for i, val in enumerate(freq) if val>1]
+station_id = [station_id[i] for i in index]
+cw_data = cw_data[cw_data.ROOT_ID.isin(station_id)]
+
+efas = read_efas_data()
+
+distance_dict = dict()
+mask = efas.dis06.mean(axis=0).values
+n_station = 0
+
+with open(DATA_DIR + 'station_ind.tsv', "w") as f:
+    f.write(f"ROOT_ID\tMinimum Distance (KM)\tLat Index\tLon Index\n")
+
+for index, row in cw_data.iloc[:, :].iterrows():
+    if row.ROOT_ID not in distance_dict:
+        n_station+=1
+        min_d = 2.5   
+        for lat_index in range(900):
+            for lon_index in range(1000):
+                if np.isnan(mask[lat_index, lon_index]) == False :
+                    with open(DATA_DIR + 'station_ind.tsv', "a") as f:
+
+                        lat = efas.latitude.values[lat_index][lon_index]
+                        lon = efas.longitude.values[lat_index][lon_index]
+                        dist = haversine((float(row.LATITUDE), float(row.LONGITUDE)), (lat, lon), unit = Unit.KILOMETERS)
+
+                        if dist < min_d:
+                            min_d = dist
+                            f.write(f"{row.ROOT_ID}\t{min_d:.3f}\t{lat_index}\t{lon_index}\n")
+                            distance_dict[row.ROOT_ID] = (min_d, lat_index, lon_index)
+                else:
+                    pass
+
+        print(f"{n_station} station discovered")
+    else:
+        pass
diff --git a/cw/io.py b/cw/io.py
@@ -12,7 +12,7 @@
 # ##########################################################
 
 from datetime import datetime
-from cw.cfg import GLOFAS_DIR, EFAS_DIR, CW_DIR
+from cw.cfg import GLOFAS_DIR, EFAS_DIR, CW_DIR, DATA_DIR
 import xarray as xr
 import datetime
 import pandas as pd
@@ -27,14 +27,13 @@ def read_glofas_data(year:int=2021, day:int=1, month:int=1):
 
     return glofas_dataset.get("dis24").sel(time=datetime.datetime(year, month, day))
 
-def read_efas_data(year:int=2021, day:int=1, month:int=1):
+def read_efas_data():
     """
     Reads the GLOFAS data for the given year and day.
     """
-    file_name = EFAS_DIR + 'efas_' + str(year) + '.grib'
-    print(file_name)
+    file_name = EFAS_DIR + 'efas_2017_2021.grib'
     efas_dataset = xr.open_dataset(file_name)
-    return efas_dataset.get("dis24").sel(time=datetime.datetime(year, month, day, hour=6))
+    return efas_dataset
 
 def read_cw_data():
     """
@@ -48,3 +47,9 @@ def read_cw_data():
     return data
 
 
+def read_cw_eu_station():
+    file_name = DATA_DIR + 'station_ind.tsv'
+
+    data = pd.read_csv(file_name, delimiter="\t")
+
+    return data
diff --git a/cw/misc.py b/cw/misc.py
@@ -0,0 +1,10 @@
+# Miscvellaneous script for crowdwater project
+
+def str_to_num(x):
+    for i in range(7):
+        if x == f"minus {i}":
+            return -i
+        elif x == f"plus {i}":
+            return i
+        else :
+            pass
diff --git a/nb_1_read_efas_glofas.ipynb b/nb_1_read_efas_glofas.ipynb