# Proof of concept:

## 1. PD.DataFrame containing timeseries for each Sensor

|time|EID1|EID2|...|EIDX|
|----|----|----|---|----|
|2024-020-05 10:00|0|0|...|1|
|...|...|...|...|...|

## 2. PD.DataFrame containing location for each sensor

|EID|lon|lat|
|---|---|---|
|E123X|10|15|
|EA2B3|1|10|
|...|...|...

## 3. xarray containing radar Data with longitude, latitude and timestamp

* no table since its a 3d array

In [17]:
import xarray as xr
import numpy as np
import pandas as pd
from datetime import datetime as dt, timedelta as td
import geopandas as gpd





In [18]:


#Reading the radar grid of the study area
grid_coord=gpd.read_file(r"Data\\grid_geojson_4326_small.geojson")
#Converting to crs UTM EPSG:25832
grid_coord=grid_coord.to_crs("EPSG:25832")
#Extracting the lon max, lon min, lat max and lat min for the first and last pixel
lon_min, lat_max = round(grid_coord.centroid[0].x), round(grid_coord.centroid[0].y)
lon_max, lat_min = round(grid_coord.centroid[len(grid_coord)-1].x), round(grid_coord.centroid[len(grid_coord)-1].y)
#Creating a list to store the longitudes from each pixel
lon_values=[]
for i in range(lon_min,lon_max+1000,1000):   #Take into account that the distance Between 2 centroid is 1000 meters
    lon_values.append(i)

lat_values = []
for i in range(lat_min, lat_max +1000, 1000):
    lat_values.append(i)

In [23]:
# Define dimensions
ID = 46
lat = len(lat_values)
lon = len(lon_values)
id_sensors= [
    "E1E54", "E223A", "E3C10", "E434E", "E436B", "E45DB", "E45EC", "E45ED",
    "E45F8", "E4692", "E46BE", "E4A7A", "E4AC1", "E4DB5", "E4E8B", "E4EB2",
    "E5217", "E525C", "E5268", "E52A7", "E53B4", "E557C", "E570D", "E572D",
    "E5744", "E57C3", "E57D3", "E5B98", "E5B9D", "E5BA7", "E5FC8", "E603E",
    "E6040", "E6069", "E611E", "E6120", "E6A24", "E7052", "E7217", "E72F2",
    "E73A3", "E75A1", "E761F", "E7A34", "E7B54", "E7DE6"
]

# Create coordinates for each dimension
time_values = pd.date_range(start=dt(2024, 5, 5, 14, 0, 0),
                            end=dt(2024,5,5,15,0,0),
                            freq="5min")
time = len(time_values)



# create DataFrame containing all Senosr-coordinates
Sensor_coords = pd.DataFrame(
    {"ID": id_sensors,
     "lat": np.random.randint(lat, size=len(id_sensors)),
     "lon": np.random.randint(lon, size=len(id_sensors))},
).set_index("ID")

# create Dataframe with Timeseries for each sensor
sensor_ts = pd.DataFrame(np.random.randint(7, size=(time, len(id_sensors))),
                         columns=id_sensors)

sensor_ts["time"] = time_values
sensor_ts = sensor_ts.set_index("time")


radar_data = np.random.randint(7, size=(time, len(lat_values), len(lon_values)))  # No ID dimension
# Create the xarray Dataset with two variables: sensor and radar measurements
ds = xr.Dataset(
    {
        #"sensor": (("ID",), Sensor_data),
        "radar": (("time", "lat", "lon"), radar_data),  # No ID dimension here
        
        
    },
    coords={
        "time": time_values,
        "lat": lat_values,
        "lon": lon_values,
        "ID": id_sensors,
    }
)

In [None]:
files_list = []

In [22]:
ds

In [5]:
Sensor_coords.loc["E1E54", "lat"]

np.int32(48)

In [6]:
sensor = {}
sensor["name"] = "E1E54"
sensor["timeseries"] = sensor_ts[sensor["name"]]
sensor["lat"] = Sensor_coords.loc["E1E54", "lat"]
sensor["lon"] = Sensor_coords.loc["E1E54", "lon"]

sensor

{'name': 'E1E54',
 'timeseries': time
 2024-05-05 14:00:00    5
 2024-05-05 14:05:00    3
 2024-05-05 14:10:00    5
 2024-05-05 14:15:00    4
 2024-05-05 14:20:00    6
 2024-05-05 14:25:00    2
 2024-05-05 14:30:00    3
 2024-05-05 14:35:00    1
 2024-05-05 14:40:00    5
 2024-05-05 14:45:00    4
 2024-05-05 14:50:00    5
 2024-05-05 14:55:00    3
 2024-05-05 15:00:00    0
 Name: E1E54, dtype: int32,
 'lat': np.int32(48),
 'lon': np.int32(22)}

In [7]:
sensor["radar"] = ds.sel(lat=sensor["lat"], lon=sensor["lon"]).radar

sensor["rad+1"] = ds.sel(lat=sensor["lat"]+1, lon=sensor["lon"]).radar

print(sensor["radar"])
print(sensor["rad+1"])


<xarray.DataArray 'radar' (time: 13)> Size: 52B
array([6, 5, 6, 6, 3, 3, 2, 0, 4, 3, 0, 5, 6], dtype=int32)
Coordinates:
  * time     (time) datetime64[ns] 104B 2024-05-05T14:00:00 ... 2024-05-05T15...
    lat      int64 8B 48
    lon      int64 8B 22
<xarray.DataArray 'radar' (time: 13)> Size: 52B
array([6, 3, 2, 1, 3, 4, 4, 0, 3, 0, 4, 1, 4], dtype=int32)
Coordinates:
  * time     (time) datetime64[ns] 104B 2024-05-05T14:00:00 ... 2024-05-05T15...
    lat      int64 8B 49
    lon      int64 8B 22


In [8]:
Sensor_data = pd.DataFrame(
    {"ID": sensor_codes,
     "lat": np.random.randint(lat, size=len(sensor_codes)),
     "lon": np.random.randint(lon, size=len(sensor_codes))}
)
Sensor_data

ds2 = xr.Dataset.from_dataframe(Sensor_data.set_index(["ID",]))
ds2

NameError: name 'sensor_codes' is not defined

In [9]:
ds

In [16]:
lat_values

[5960019,
 5961019,
 5962019,
 5963019,
 5964019,
 5965019,
 5966019,
 5967019,
 5968019,
 5969019,
 5970019,
 5971019,
 5972019,
 5973019,
 5974019,
 5975019,
 5976019,
 5977019,
 5978019,
 5979019,
 5980019,
 5981019,
 5982019,
 5983019,
 5984019,
 5985019,
 5986019,
 5987019,
 5988019,
 5989019,
 5990019,
 5991019,
 5992019,
 5993019]

In [24]:
import xarray as xr
import numpy as np

# Example: Create a simple 2D xarray DataArray (with shape (lat, lon))
latitudes = np.linspace(-90, 90, 10)  # 10 latitudes
longitudes = np.linspace(-180, 180, 20)  # 20 longitudes
data = np.random.rand(10, 20)  # Random data values for each (lat, lon) point

# Create a DataArray
grid_data = xr.DataArray(data, coords=[latitudes, longitudes], dims=["latitude", "longitude"])


In [25]:
# Sensor locations
sensor_latitudes = np.array([34.05, 34.05, 40.71, 40.71])  # latitudes for sensors
sensor_longitudes = np.array([-118.25, -118.25, -74.01, -74.01])  # longitudes for sensors
sensor_ids = np.array([1, 2, 3, 4])  # Unique sensor IDs


In [26]:
# Add new coordinate 'sensor' to the grid data
grid_data.coords["sensor"] = ("sensor", sensor_ids)
grid_data.coords["sensor_latitude"] = ("sensor", sensor_latitudes)
grid_data.coords["sensor_longitude"] = ("sensor", sensor_longitudes)


ValueError: cannot add coordinates with new dimensions to a DataArray

In [None]:
# Display the xarray object with sensor location data
print(grid_data)

# Access the sensor latitudes and longitudes
print("Sensor Latitudes:", grid_data.coords["sensor_latitude"].values)
print("Sensor Longitudes:", grid_data.coords["sensor_longitude"].values)

# Example of retrieving data from a specific sensor (sensor ID = 1)
sensor_1_lat = grid_data.coords["sensor_latitude"].sel(sensor=1).values
sensor_1_lon = grid_data.coords["sensor_longitude"].sel(sensor=1).values
print(f"Sensor 1 Location: Latitude={sensor_1_lat}, Longitude={sensor_1_lon}")


In [None]:
# Example Dataset with sensor data
sensor_data = np.random.rand(4)  # Simulated sensor readings

# Create a dataset
ds = xr.Dataset(
    {
        "sensor_reading": ("sensor", sensor_data)
    },
    coords={
        "sensor": ("sensor", sensor_ids),
        "sensor_latitude": ("sensor", sensor_latitudes),
        "sensor_longitude": ("sensor", sensor_longitudes)
    }
)
