## Using the EWXPWSDB Collector Class

# <span style="color:red">clear all output before saving: db output contains passwords! </span>

this walks through process of

- creating a temporary DB
- using the collector class for existing station records to
    - get past data 
    - pull data for short period, e.g. from a scheduler
    - get all recent data, aka catch up data from last record


In [None]:
%load_ext autoreload
%autoreload 2

## Constants/Config

In [None]:

station_file = '../data/test_stations.tsv'
station_type = 'DAVIS'
station_code = 'EWXDAVIS01' 

In [None]:

from ewxpwsdb.db.database import Session, init_db, get_db_url, get_engine
from ewxpwsdb.db.models import WeatherStation, Reading, StationType, APIResponse
from ewxpwsdb.db.importdata import import_station_file
from ewxpwsdb.collector import Collector
from ewxpwsdb.time_intervals import UTCInterval
from sqlmodel import select

## create engine temp database

In [None]:
from ewxpwsdb.db.database import temp_pg_engine
engine = temp_pg_engine(host='localhost')

temp_db_url = engine.url
print(temp_db_url.database)


In [None]:
init_db(engine,station_file)


In [None]:
def get_one_station(station_type, station_code = None, engine=engine):
    """ global engine is default"""

    if station_code:
        statement = select(WeatherStation).where(WeatherStation.station_code == station_code)
    else:
        statement = select(WeatherStation).where(WeatherStation.station_type == station_type)

    with Session(engine) as session:
        results = session.exec(statement)
        weather_station = results.first()

    return weather_station

In [None]:
station = get_one_station(station_type, station_code, engine)
print(station.station_code)


In [None]:
collector = Collector(station, engine)
collector.weather_api

In [None]:
collector._session.commit()

In [None]:
from datetime import timedelta
from ewxpwsdb.time_intervals import UTCInterval
duration_min = 70
viable_interval = UTCInterval.previous_interval(delta_mins=duration_min)
yesterday = UTCInterval(start=viable_interval.start - timedelta(days = 1), 
                           end = viable_interval.end - timedelta(days = 1)
                           )
yesterday

In [None]:
from datetime import timedelta
from ewxpwsdb.time_intervals import previous_fourteen_minute_interval

interval = previous_fourteen_minute_interval()

interval.start = interval.start - timedelta(hours = 1)
interval.end = interval.end - timedelta(hours = 0.5)
interval

In [None]:
today_interval = UTCInterval.one_day_interval()  # this defaults to getting the time range from midnight to now
two_day_interval = UTCInterval(start = (today_interval.start - timedelta(days = 1)), end = today_interval.end)
two_day_interval

In [None]:
from datetime import datetime, timezone
today_utc = datetime.now(timezone.utc).date()

collector.request_and_store_weather_data_utc(UTCInterval.one_day_interval(d = today_utc- timedelta(days = 1)))

In [None]:
collector.get_readings(1)

In [None]:
readings

In [None]:
# call the method to see if it's working
somerex = collector.request_and_store_weather_data_utc(interval)
somerex

In [None]:
# are api response ids being saved in the object?

collector.current_api_response_record_ids

In [None]:
# to re-fill this sqlalchemy record cache, just ask for some piece of the data
print(collector.current_api_response.id)
# now the object cache is refilled and should be present
collector.current_api_response

In [None]:
# show the transformed readings, if any
collector.current_readings

In [None]:
# again, demonstrate that the readings were stored in the database by checking the ID field
collector.current_readings[0].id


## Test/Demo getting a full day of readings

In [None]:
viable_interval = UTCInterval.previous_fifteen_minutes()
collector.request_and_store_weather_data_utc(viable_interval)


In [None]:
yesterday = UTCInterval(start=viable_interval.start - timedelta(days = 1),
                             end = viable_interval.end - timedelta(days = 1)
                               )
print(yesterday)
collector._session.rollback()

response_ids = collector.request_and_store_weather_data_utc(yesterday)
print("response ids:")
print(response_ids)
print("reading ids:")
print(collector.current_reading_ids)

In [None]:
## there _should_ be readings from the same interval in there now     

readings = collector.get_readings_by_date(yesterday)

In [None]:
print(readings)

## Test/demo of restricting data inserts

This used to throw an exception when saving readings with the same timestamp and station. 
however, even though this is a unique constraint on these columns, the collector code checks for that, and simply 
updates the record.  This is known as an 'upsert' but it doesn't using the Postgresql + SQLAlchemy upsert but rather some custom code. 

In [None]:

from sqlalchemy.exc import IntegrityError

try:
    something = collector.save_readings_from_responses(api_responses = collector.current_api_response)
except IntegrityError as e:
    collector._session.rollback()
    print("integrity error prevented duplicate records from being inserted")

# what happens to the current readings? 
collector.current_readings

In [None]:
# ensure the rollback worked

try:
    something = collector.save_readings_from_responses(api_responses = collector.current_api_response)
except IntegrityError as e:
    collector._session.rollback()
    print("didn't get the rollback error")

    

In [None]:
# do we still have radings after a rollback?
readings = collector.get_readings(n=5)
readings


### Clean up

remove test database

In [None]:

from ewxpwsdb.db.database import drop_temp_pg_engine, list_pg_databases
from sqlalchemy.orm import close_all_sessions

if collector:
    collector._session.close()
    collector._engine.dispose()

close_all_sessions()




print(f"attempting to drop db {engine.url.database}")
result = drop_temp_pg_engine(engine)
print(result)
engine.dispose()
list_pg_databases(host='localhost')

