In [2]:
#Import the packages - these are already contained in the pre-built env.
import xarray as xr
import pandas as pd
import os
import matplotlib.pyplot as plt
import datetime as dt



# Read the sensor mapping from the CSV file
sensor_mapping_df = pd.read_csv('../data/table_Sensors.csv')
metgeo = pd.read_csv('../data/Location_File.csv')
meas = pd.read_csv('../data/my_measurements.csv')

# Add an "aware" time column
meas['time'] = pd.to_datetime(meas['dt'])

In [8]:
## Show the time (no longer a string...)
print(meas['time'])

0        2015-01-01 00:00:00
1        2015-01-01 00:00:00
2        2015-01-01 00:00:00
3        2015-01-01 00:00:00
4        2015-01-01 00:00:00
                 ...        
819797   2015-02-03 06:00:00
819798   2015-02-03 06:00:00
819799   2015-02-03 06:00:00
819800   2015-02-03 06:00:00
819801   2015-02-03 06:00:00
Name: time, Length: 819802, dtype: datetime64[ns]


In [2]:
# Create a dictionary mapping sensorID to sensor names
names = dict(zip(sensor_mapping_df['SensorID'], sensor_mapping_df['symbol']))

# Replace the sensorID integers with their corresponding names
meas['name'] = meas['sensorid'].replace(names)

In [9]:
#Reorganize the data, so to "stack it by sensorid" 
meas_pivoted = meas.pivot(index='time', columns='name', values='value')

print(meas_pivoted)

name                 RH (av)  RH (max)  RH (min)  TAir (av)  TAir (max)  \
time                                                                      
2015-01-01 00:00:00     71.9      72.4      71.5       15.7        15.8   
2015-01-01 01:00:00     71.0      71.2      70.7       15.9        15.9   
2015-01-01 02:00:00     72.2      72.4      72.0       15.8        15.8   
2015-01-01 03:00:00     73.9      74.1      73.6       15.8        15.8   
2015-01-01 04:00:00     71.6      72.8      70.5       16.3        16.4   
...                      ...       ...       ...        ...         ...   
2022-07-27 10:00:00      0.5       0.5       0.4      -39.6       -39.5   
2022-07-27 11:00:00      0.5       0.5       0.4      -39.5       -39.5   
2022-08-12 07:00:00      0.2       0.3       0.0      -39.9       -39.8   
2022-09-26 08:00:00      0.2       0.3       0.2      -39.8       -39.7   
2022-09-26 09:00:00      0.2       0.3       0.2      -39.8       -39.7   

name                 TAi

In [10]:
#COnvert to xarray - it is easy! 
ds = meas_pivoted.to_xarray()
print(ds)

<xarray.Dataset> Size: 7MB
Dimensions:         (time: 61225)
Coordinates:
  * time            (time) datetime64[ns] 490kB 2015-01-01 ... 2022-09-26T09:...
Data variables: (12/14)
    RH (av)         (time) float64 490kB 71.9 71.0 72.2 73.9 ... 0.5 0.2 0.2 0.2
    RH (max)        (time) float64 490kB 72.4 71.2 72.4 74.1 ... 0.5 0.3 0.3 0.3
    RH (min)        (time) float64 490kB 71.5 70.7 72.0 73.6 ... 0.4 0.0 0.2 0.2
    TAir (av)       (time) float64 490kB 15.7 15.9 15.8 ... -39.9 -39.8 -39.8
    TAir (max)      (time) float64 490kB 15.8 15.9 15.8 ... -39.8 -39.7 -39.7
    TAir (min)      (time) float64 490kB 15.6 15.9 15.8 ... -40.0 -39.8 -39.8
    ...              ...
    WS10 (av)       (time) float64 490kB 1.5 1.5 1.5 1.2 1.4 ... 0.0 0.0 0.0 0.0
    WS10 (max)      (time) float64 490kB 3.2 3.2 3.3 2.5 3.5 ... 0.3 0.2 0.2 0.2
    WS10 (min)      (time) float64 490kB 0.4 0.3 0.3 0.2 0.2 ... nan nan nan nan
    WS2 (av)        (time) float64 490kB 0.4 0.4 0.4 0.3 0.4 ... 0.1 0.0 0.1

In [11]:
#The dataset, as well as the variables containing it, still have no metadata (metainformnation), describing the product. 
# Let us start by creating some dictionaries to map each variable to the metadata we need. 

# input is always "symbol" (i.e. the quantity name) and output is either units, the complete long name, or the decimal used to describe it

mapUnits = dict(zip(sensor_mapping_df['symbol'], sensor_mapping_df['unit']))
mapLongName = dict(zip(sensor_mapping_df['symbol'], sensor_mapping_df['en_name']))
mapDecimals = dict(zip(sensor_mapping_df['symbol'], sensor_mapping_df['decimal_num']))
loc = dict(zip(metgeo['stNo'], metgeo['station_nm']))

In [2]:
#Get lat and lon of the station: 
_lat = dict(zip(metgeo['stNo'], metgeo['latt']))
_lon = dict(zip(metgeo['stNo'], metgeo['long']))

In [2]:
#Now, for each variable, append this information
for varname in ds: 
    #print(varname, mapUnits[varname], mapLongName[varname])
    ds[varname].attrs['units'] = mapUnits[varname]
    ds[varname].attrs['long_name'] = mapLongName[varname]
    ds[varname].attrs['decimals'] = mapDecimals[varname]

In [2]:
#Same goes for the whole dataset (i.e. the infos about the station)   
ds.attrs['station_name'] = loc[meas['stno'].unique()[0]]
ds.attrs['lat'] = _lat[meas['stno'].unique()[0]]
ds.attrs['lon'] = _lon[meas['stno'].unique()[0]]
ds.attrs['date_created'] = str(dt.datetime.now())
try: 
    ds.attrs['created by'] =  "{}, on {}".format(os.getlogin(), os.name) # POSIX stands for "Portable Operating System Interface for Unix."
except:
    pass
ds.attrs['xarray version'] = xr.__version__

In [2]:
#Print the new, improved, updated version of your measurement
print(ds)

In [2]:
#Save to netcdf
ds.to_netcdf('../output/my_measurements.nc', unlimited_dims='time')

<xarray.Dataset> Size: 7MB
Dimensions:         (time: 61225)
Coordinates:
  * time            (time) datetime64[ns] 490kB 2015-01-01 ... 2022-09-26T09:...
Data variables: (12/14)
    RH (av)         (time) float64 490kB 71.9 71.0 72.2 73.9 ... 0.5 0.2 0.2 0.2
    RH (max)        (time) float64 490kB 72.4 71.2 72.4 74.1 ... 0.5 0.3 0.3 0.3
    RH (min)        (time) float64 490kB 71.5 70.7 72.0 73.6 ... 0.4 0.0 0.2 0.2
    TAir (av)       (time) float64 490kB 15.7 15.9 15.8 ... -39.9 -39.8 -39.8
    TAir (max)      (time) float64 490kB 15.8 15.9 15.8 ... -39.8 -39.7 -39.7
    TAir (min)      (time) float64 490kB 15.6 15.9 15.8 ... -40.0 -39.8 -39.8
    ...              ...
    WS10 (av)       (time) float64 490kB 1.5 1.5 1.5 1.2 1.4 ... 0.0 0.0 0.0 0.0
    WS10 (max)      (time) float64 490kB 3.2 3.2 3.3 2.5 3.5 ... 0.3 0.2 0.2 0.2
    WS10 (min)      (time) float64 490kB 0.4 0.3 0.3 0.2 0.2 ... nan nan nan nan
    WS2 (av)        (time) float64 490kB 0.4 0.4 0.4 0.3 0.4 ... 0.1 0.0 0.1