In [19]:
from pathlib import Path

from src.oxrivers_api.DataCache import DataCache
from src.oxrivers_api.loader import Loader
from src.oxrivers_api.client import OxfordRiversClient
from src.oxrivers_api.sites_discovery import SitesDiscovery
from src.oxrivers_api.determinands_discovery import DeterminandsDiscovery

# choose where to store json
data_dir: Path = Path("../data")

# set up Oxford Rivers Client and Pandas loader
client = OxfordRiversClient(data_dir)
loader = Loader(client)

In [20]:
# Determinands discovery helps you figure out what datasets have the information you are looking for
determinands_discovery = DeterminandsDiscovery(loader)

# see what determinands there are
determinands_discovery.get_determinands_info()

# Escherichia coli (EC) is in the list. Let's see what datasets track E. coli.
ecoli_id = "Escherichia coli (EC)"
datasets_with_ecoli = determinands_discovery.get_datasets_with_determinand(ecoli_id)

INFO:                                                name  \
0              Alkalinity to pH 4.5 as CaCO3 (mg/l)   
1                    Ammonia un-ionised as N (mg/l)   
2                                   Ammonium (mg/L)   
3                   Ammoniacal Nitrogen as N (mg/l)   
4                            Blue-Green Algae (RFU)   
5   Carbon, Organic, Dissolved as C :- {DOC} (mg/l)   
6                                   Chloride (mg/l)   
7                                Chlorophyll (µg/L)   
8                      Conductivity at 25 C (µS/cm)   
9                              Conductivity (µS/cm)   
10                  Dissolved organic carbon (mg/l)   
11                            Escherichia coli (EC)   
12       Fluorescent Dissolved Organic Matter (RFU)   
13                      Intestinal enterococci (IE)   
14                              Nitrate as N (mg/l)   
15                              Nitrite as N (mg/l)   
16                      Nitrogen, Total as N (mg/l)   
17  

In [21]:
# Sites discovery helps you see where the data you are interested in is collected
# for a particular dataset
sites_discoveries = [SitesDiscovery(loader, datasetID) for datasetID in datasets_with_ecoli["dataset"]]

# Let's look at the swimming areas
ea_bathing_sites_with_ecoli = sites_discoveries[0].get_sites_info()
ea_bathing_sites_columns = sites_discoveries[0].get_columns()
wolvercote_bathing_coordinates = sites_discoveries[0].sites["geometry_coordinates"].loc[0]


INFO:   properties_id         properties_name
0         11946  Wolvercote Mill Stream
1         11947       Wallingford Beach
INFO: Index(['geometry_coordinates', 'properties_id', 'properties_name',
       'properties_threshold', 'properties_popserved', 'lon', 'lat'],
      dtype='object')
INFO: ['geometry_coordinates', 'properties_id', 'properties_name', 'properties_threshold', 'properties_popserved', 'lon', 'lat']


In [22]:
# Let's find other sites near Wolvercote in the other E. coli dataset
other_ecoli_sites_near_wolvercote = sites_discoveries[1].get_closest_sites_to(wolvercote_bathing_coordinates)
# And print the names of the ones within 10 km
within_10km_wolvercote = other_ecoli_sites_near_wolvercote[other_ecoli_sites_near_wolvercote["distance_from_ref"] < 10000]
print(within_10km_wolvercote["properties_name"])


INFO: Index(['geometry_coordinates', 'properties_id', 'properties_name',
       'properties_threshold', 'properties_popserved', 'lon', 'lat'],
      dtype='object')
INFO: ['geometry_coordinates', 'properties_id', 'properties_name', 'properties_threshold', 'properties_popserved', 'lon', 'lat']


15    Tumbling Bay Bathing Area, Oxford (Laboratory ...
18                     Folly Bridge, Oxford (Petrifilm)
16                      Longbridges, Oxford (Petrifilm)
Name: properties_name, dtype: object


In [23]:
# Now let's plot the time series of Wolvercote bathing area E. coli levels and the next closest E. coli site on the same graph
# Save the datasetID and siteID for each
wolvercote_bathing_sites = sites_discoveries[0].sites.loc[0]
wolvercote_timeseries_info = (datasets_with_ecoli.loc[0, "dataset"],
                              sites_discoveries[0].sites.loc[0, "properties_id"],
                              datasets_with_ecoli.loc[0, "id"]) # datasetID, siteID, determinand
print(wolvercote_timeseries_info)

row_nearest = within_10km_wolvercote.head(1).index
nearest_siteID = sites_discoveries[1].sites.loc[row_nearest[0], "properties_id"]
print(nearest_siteID)
nearest_timeseries_info = (datasets_with_ecoli.loc[1, "dataset"],
                              nearest_siteID,
                              datasets_with_ecoli.loc[1, "id"]) # datasetID, siteID, determinand
print(nearest_timeseries_info)

# get the timeseries for each
# wolvercote_ecoli_timeseries = loader.load_timeseries_determinand(wolvercote_timeseries_info[0], wolvercote_timeseries_info[1], wolvercote_timeseries_info[2])
# nearest_ecoli_timeseries = loader.load_timeseries_determinand(nearest_timeseries_info[0], nearest_timeseries_info[1], nearest_timeseries_info[2])
#
# loader.load_timeseries_determinand('wtrt', '1743499042454x452904883018006500', "EC")


('ea_bathing_water', '11946', 'EC')
1743499042454x452904883018006500
('wtrt', '1743499042454x452904883018006500', 'EC')


In [24]:
from src.oxrivers_api.request_models import TimeseriesInfo, DataForDateInfo

# Example usage to get and store time series data for fft in Oxford:
cache = DataCache(loader)
cache.get_timeseries(TimeseriesInfo("fft", "Oxford"))
cache.get_data_for_date(DataForDateInfo("rainfall", "2022-05-05"))

# list contents in cache
print(cache.contents())
print(cache.get_by_key("fft:Oxford:None").head())
print(cache.get_by_key("rainfall_2022-05-05").head())

Oxford
{'fft:Oxford:None': TimeseriesInfo(datasetID='fft', siteID='Oxford', determinand=None), 'rainfall_2022-05-05': DataForDateInfo(datasetID='rainfall', date='2022-05-05')}
             datetime        value qualifier   id  siteID       endPoint  \
0 2023-01-01 00:00:00  1010.151184      None  fft  Oxford  getTimeseries   
1 2023-01-01 00:15:00  1009.032837      None  fft  Oxford  getTimeseries   
2 2023-01-01 00:30:00  1013.774902      None  fft  Oxford  getTimeseries   
3 2023-01-01 00:45:00  1006.010498      None  fft  Oxford  getTimeseries   
4 2023-01-01 01:00:00  1002.681274      None  fft  Oxford  getTimeseries   

  determinand determinand_label determinand_unit  
0        None              None             None  
1        None              None             None  
2        None              None             None  
3        None              None             None  
4        None              None             None  
     datetime  value     id
0  2022-05-05   0.01  39001
1  20