In [60]:
import requests
from datetime import date, datetime, timedelta
import pandas as pd
from os import listdir
from io import StringIO
import sys
sys.path.append('../')
from database import connect_to_db_upload


In [1]:
# Function to format CVS
def format_csv(csv_path):
    # CSV file into a dataframe and format to have datetime and numeric columns
    df = pd.read_csv(csv_path, names=['date','counts'])[1:] # Remove extra first row
    df["date"] = pd.to_datetime(df['date'])
    df['date'] = df['date'].dt.tz_convert('UTC') # convert time zone to UTC
    df = df.set_index('date')
    df["counts"] = df["counts"].apply(pd.to_numeric)
    return df

In [19]:
def fetch_weather(my_station, enddt):
    """Main loop."""
    print('Entered fecth_weather function')
    # Step 1: Fetch global METAR geojson metadata
    # https://mesonet.agron.iastate.edu/sites/networks.php
    req = requests.get(
        "http://mesonet.agron.iastate.edu/geojson/network/AZOS.geojson",
        timeout=60,
    )
    geojson = req.json()
    for feature in geojson["features"]:
        station_id = feature["id"]
        if station_id == my_station:
            
            props = feature["properties"]
            # We want stations with data to today (archive_end is null)
            if props["archive_end"] is None:
                print('archive_end is null = data to today')

            print(f'Fetching data for station {station_id}')
            uri = (
                "http://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?"
                f"station={station_id}&data=tmpc&year1=1928&month1=1&day1=1&"
                f"year2={enddt.year}&month2={enddt.month}&day2={enddt.day}&"
                "tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&"
                "direct=yes&report_type=3"
            )
            print('uri: ', uri)

            res = requests.get(uri, timeout=300)
            print('received response type: ', type(res))
            return res

In [68]:
monitors = pd.read_csv('../detector_info_settings/detector_locations.csv')
detector_name = monitors.loc[monitors['weather_station'] == 'CQT', 'name'].to_list()
detector_name

['Chara_Muon002']

In [62]:
def download_and_save_weather_table():

    # Home directory
    homedir = '../data/'
    # Get station ids from detector settings
    monitors = pd.read_csv('../detector_info_settings/detector_locations.csv')
    ids = monitors['weather_station'].to_list()
    
    for id in ids:
        if ids.count(id) > 1:
            names = monitors.loc[monitors['weather_station'] == id, 'name'].to_list()

        else:
            detector_name = monitors.loc[monitors['weather_station'] == id, 'name'].item()
            detector_csv = f'{homedir}{detector_name}/{detector_name}_all_logs.csv'
            df = pd.read_csv(detector_csv)
            df['date'] = pd.to_datetime(df['date'])
            df = df.set_index('date')
            
            # get start and end dates of df
            df.sort_index(inplace=True)
            oldest = df.head(1).index.values[0]
            earliest = df.tail(1).index.values[0]
            print(f'dates: ', oldest, earliest)

    # list of files within selected monitor
    all_files = listdir(homedir + detector_name.lower())
    # filter based on ending being 'all_logs.csv'
    detector_csv = [f for f in all_files if f.endswith('all_logs.csv')][0]
    if len(detector_csv) == 0 and version != None:
        more_files = listdir(homedir+detector_name.lower()+'/'+version.lower())
        detector_csv = [f for f in more_files if f.endswith('all_logs.csv')][0]

    # Get df and format
    if version:
        filesubpath = f'{homedir}{detector_name.lower()+'/'+version.lower()+'/'}'
        df = pd.read_csv(homedir+detector_name.lower()+'/'+version.lower()+'/'+detector_csv)
    else:
        filesubpath = f'{homedir}{detector_name.lower()+'/'}'
        df = pd.read_csv(homedir+detector_name+'/'+detector_csv)

    df['date'] = pd.to_datetime(df['date'])
    df = df.set_index('date')
    
    # get start and end dates of df
    df.sort_index(inplace=True)
    oldest = df.head(1).index.values[0]
    earliest = df.tail(1).index.values[0]
    print(f'dates: ', oldest, earliest)

    # fetch
    # print(f'Entering function {my_station}')
    weatherjson = fetch_weather(my_station, date.today())
    # print('Done fetching')

    # Store as csv and df
    filename = f"{filesubpath}{my_station}.csv"
    wdf = pd.read_csv(StringIO(weatherjson.text), sep=',')

    # Slice only for needed information based on dates - consider if temperature in farenheit
    print('Columns: ', wdf.columns.to_list())
    if 'tmpc' in wdf.columns.to_list():
        wdf['tmpf'] = (wdf['tmpc'] * 9/5) + 32
    if 'mslp' in wdf.columns.to_list():
        wdf = wdf.rename(columns={'valid':'date', 'tmpf':'temp_in_f', 'mslp':'sea_l_pressure_millibar'})
    else:
        wdf = wdf.rename(columns={'valid':'date', 'tmpf':'temp_in_f'})
    wdf = wdf.drop(columns=['station'])

    wdf = wdf.set_index('date')
    wdf.sort_index(inplace=True)
    wdf = wdf.loc[oldest:]

    engine, conn = connect_to_db_upload()

    wdf.to_sql(
        con=engine, name=f'{my_station.lower()}', if_exists='replace', index_label='date')
    print(f'Table {my_station.lower()} sent to DB successfully')

    # Make primary key for table via PSYCOPG2
    cur = conn.cursor()
    cur.execute(f"""ALTER TABLE {my_station.lower()} ADD PRIMARY KEY (date)""")
    conn.commit()
    cur.close()
    print('Query for primary key sent successfully')


In [42]:
download_and_save_weather_table()

Downloading weather data for detector Abuja
dates:  2024-05-01T15:00:00.000000000 2024-08-21T03:00:00.000000000
Entering function DNAA
Entered fecth_weather function
archive_end is null = data to today
Fetching data for station DNAA
uri:  http://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=DNAA&data=tmpc&year1=1928&month1=1&day1=1&year2=2024&month2=9&day2=3&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=yes&report_type=3
received response type:  <class 'requests.models.Response'>
Done fetching


Unnamed: 0,"station,valid,tmpc"
0,"DNAA,2003-07-01 11:00,27.00"
1,"DNAA,2003-07-02 08:00,25.00"
2,"DNAA,2003-07-04 07:00,23.00"


In [44]:
abuja_wdf.tail(3)

Unnamed: 0,"station,valid,tmpc"
68420,"DNAA,2024-09-02 20:00,26.00"
68421,"DNAA,2024-09-02 21:00,25.00"
68422,"DNAA,2024-09-02 22:00,25.00"


In [52]:
abuja_wdf.columns.to_list()

['station,valid,tmpc']

In [53]:
abuja_wdf = abuja_wdf.drop('station')
abuja_wdf

KeyError: "['station'] not found in axis"