In [3]:
import os
import logging
import glob

from dotenv import load_dotenv
from sqlalchemy import create_engine
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import webdataset as wds
import matplotlib.pyplot as plt

import net.net as net
import net.io as io
import gfs.fetch
import utils

In [4]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
load_dotenv()

connection_string = "postgresql://{user}:{password}@{host}:{port}/{db}".format(
    user=os.getenv('DB_USER'),
    password=os.getenv('DB_PASSWORD'),
    host=os.getenv('DB_HOST'),
    port=os.getenv('DB_PORT'),
    db=os.getenv('DB_NAME')
)
engine = create_engine(connection_string)

In [6]:
col_names = gfs.fetch.get_col_order()

col_names_full = []
for col in col_names:
    col_names_full.append(f'{col}_12')

In [7]:
# Compute scaling parameters in database for weather features
query = f"""
WITH stats AS (
    SELECT 
        {', '.join(f'AVG({col}) as mean_{col}, STDDEV({col}) as std_{col}' for col in col_names_full)}
    FROM glideator_fs.features_with_target
    WHERE is_validation = FALSE
)
SELECT * FROM stats
"""
stats_df = pd.read_sql(query, engine)

# Create scaling params dictionary for weather features
scaling_params = {}
for col in col_names_full:
    scaling_params[col] = {
        'mean': float(stats_df[f'mean_{col}'].iloc[0]),
        'std': float(stats_df[f'std_{col}'].iloc[0])
    }

# Compute scaling parameters for site features
site_query = """
WITH stats AS (
    SELECT 
        AVG(latitude) as mean_latitude, STDDEV(latitude) as std_latitude,
        AVG(longitude) as mean_longitude, STDDEV(longitude) as std_longitude,
        AVG(altitude) as mean_altitude, STDDEV(altitude) as std_altitude
    FROM glideator_fs.features_with_target
    WHERE is_validation = FALSE
)
SELECT * FROM stats
"""
site_stats_df = pd.read_sql(site_query, engine)

# Create scaling params dictionary for site features
site_scaling_params = {}
for col in ['latitude', 'longitude', 'altitude']:
    site_scaling_params[col] = {
        'mean': float(site_stats_df[f'mean_{col}'].iloc[0]),
        'std': float(site_stats_df[f'std_{col}'].iloc[0])
    }

# Initialize the layers
weather_scaler = net.StandardScalerLayer(scaling_params)
site_scaler = net.StandardScalerLayer(site_scaling_params)

In [8]:
io.save_net(weather_scaler, 'training/models/weather_scaler.pth')
io.save_net(site_scaler, 'training/models/site_scaler.pth')