# Imports

In [None]:
import warnings
warnings.filterwarnings('ignore')

import os
import pickle
import pandas as pd
import geopandas as gpd

from pathlib import Path
from tqdm.auto import tqdm
from datetime import datetime
from datetime import timedelta

from src.data import hrrr_week_download, modis_downloader
from src.features import hrrr_week_features, hrrr_winter_features, modis_features

# Config

In [None]:
dates = [
    '2021-12-02', 
    '2021-12-09',
    '2021-12-16', 
    '2021-12-23', 
    '2021-12-30', 
    '2022-01-06',
    '2022-01-13', 
    '2022-01-20', 
    '2022-01-27', 
    '2022-02-03',
    '2022-02-10', 
    '2022-02-17', 
    '2022-02-24', 
    '2022-03-03', 
    '2022-03-10', 
    '2022-03-17', 
    '2022-03-24', 
    '2022-03-31',
    '2022-04-07', 
    '2022-04-14', 
    '2022-04-21', 
    '2022-04-28',
    '2022-05-05', 
    '2022-05-12', 
    '2022-05-19',
    '2022-05-26',
    '2022-06-02', 
    '2022-06-09', 
    '2022-06-16', 
    '2022-06-23',
    '2022-06-30',
]


In [None]:
PATH_HRRR_DATA = 'data/external/hhhr_data/'
PATH_WEEK_FEATURES = 'data/processed/hrrr_week_feastures/'
PATH_WINTER_FEATURES = 'data/processed/hrrr_winter_feastures/'
PATH_TIMESERIES = 'data/processed/hrrr_timeserias/'
PATH_GRID = 'data/input/grid_cells.geojson'
PATH_GRID2 = 'data/input/grid_cell_stage2.geojson'
PATH_MODIS = 'data/external/modis/'
PATH_MODIS_FEATURES = 'data/processed/modis_features/'

In [None]:
# feature columns
cols = ['cell_id','valid_time','temp_mean','temp_sum','temp_sum_cold',
        'temp_sum_warm','temp_sum_cold_hours','temp_sum_warm_hours',
        'tp_mean','tp_sum','tp_sum_liquid','tp_sum_solid','rain_enrg',
        'thaw_count','dswrf_mean','dswrf_sum','si10_mean','si10_sum',
        'sdwe_mean','sdwe_sum','sdwe_range','sdwe_last','sdwe_first',
        'si10','dswrf','t2m','tp','tp_pls','tp_mns','t2m_pls','t2m_mns',
        'rain_nrg','si10_cumsum','si10_mean_sws','dswrf_cumsum','dswrf_mean_sws',
        't2m_cumsum','t2m_mean_sws','tp_cumsum','tp_mean_sws','tp_pls_cumsum',
        'tp_pls_mean_sws','tp_mns_cumsum','tp_mns_mean_sws','t2m_pls_cumsum',
        't2m_pls_mean_sws','t2m_mns_cumsum','t2m_mns_mean_sws','rain_nrg_cumsum',
        'rain_nrg_mean_sws','si10_m7','dswrf_m7','t2m_m7','tp_m7','tp_pls_m7',
        'tp_mns_m7','t2m_pls_m7','t2m_mns_m7','rain_nrg_m7','si10_cumsum_m7',
        'si10_mean_sws_m7','dswrf_cumsum_m7','dswrf_mean_sws_m7','t2m_cumsum_m7',
        't2m_mean_sws_m7','tp_cumsum_m7','tp_mean_sws_m7','tp_pls_cumsum_m7',
        'tp_pls_mean_sws_m7','tp_mns_cumsum_m7','tp_mns_mean_sws_m7','t2m_pls_cumsum_m7',
        't2m_pls_mean_sws_m7','t2m_mns_cumsum_m7','t2m_mns_mean_sws_m7','rain_nrg_cumsum_m7',
        'rain_nrg_mean_sws_m7','sc','ndsi1','sa1','lon','lat','alt','alt_min_200',
        'alt_max_200','alt_mean_200','slope','slope_mean_200','slope_median_200',
        'aspect','aspect_mean_200','aspect_median_200','curv_prof','curv_prof_mean_200',
        'curv_prof_median_200','curv','curv_mean_200','curv_median_200','curv_plan',
        'curv_plan_mean_200','curv_plan_median_200','tri','tri_mean_200','tri_median_200',
        'alt_min_500','alt_max_500','alt_mean_500','alt_median_500','slope_mean_500',
        'slope_median_500','aspect_mean_500','aspect_median_500','curv_prof_mean_500',
        'curv_prof_median_500','curv_plan_mean_500','curv_plan_median_500','curv_mean_500',
        'curv_median_500','tri_mean_500','tri_median_500','dayofyear','year']

In [None]:
grid_cells = gpd.read_file(Path(PATH_GRID2))

# Data Processing
## Download Data
### Download Weekly Data (HRRR)

In [None]:
%%time
# download meteo data (HRRR)
hrrr_week_download.download_data(dates , output_path='data/external/hhhr_data/')

### MODIS download

In [None]:
%%time
modis_downloader.download(PATH_MODIS, how='new')

## Feature engineering
### Weekly HRRR Features

In [None]:
%%time
for date in tqdm(dates, desc='timestamp'):
    if pd.to_datetime(date) < datetime.now():
        hrrr_week_features.features_for_timestamp(folder= str(Path(os.path.join(PATH_HRRR_DATA, date))),
                                                  grid_cells=grid_cells,
                                                  features_save_path=str(Path(PATH_WEEK_FEATURES)),
                                                  save_path = str(Path(PATH_TIMESERIES))
                                                 )



### Winter HRRR Features

In [None]:
%%time
# hrrr winter features. if last_year = False -> calc all historical data
hrrr_winter_features.winter_features(path_timeseries= PATH_TIMESERIES, 
                                     features_save_path=PATH_WINTER_FEATURES, 
                                     last_year=True)

### MODIS features

In [None]:
%%time
for year in os.listdir(PATH_MODIS):
    path = os.path.join(PATH_MODIS, year)
    modis_features.modis_features(path, grid_cells, all_files=True,
                   last_files=0, output_path=PATH_MODIS_FEATURES)

In [None]:
paths_modis = list(Path(PATH_MODIS_FEATURES).rglob('*.csv'))
df_modis = modis_features.get_modis_df(paths_modis, dates)
df_modis['valid_time'] = df_modis['valid_time'].dt.date.astype(str)

# Make inference dataset 

In [None]:
sub = pd.read_csv('data/input/submission_format_2022.csv')
sub = sub.melt('Unnamed: 0').fillna(0)
sub.columns = ['cell_id', 'valid_time', 'swe']
sub = sub.set_index(['valid_time', 'cell_id'])

In [None]:
paths_week_f = list(Path(PATH_WEEK_FEATURES).rglob('*.csv'))
df_week = [pd.read_csv(p) for p in paths_week_f]
df_week = pd.concat(df_week)

paths_winter_f = list(Path(PATH_WINTER_FEATURES).rglob('*.csv'))
df_winter = [pd.read_csv(p) for p in paths_winter_f]
df_winter = pd.concat(df_winter)

df_dem = pd.read_csv('data/raw/dem_features.csv')

df_grid = grid_cells.copy()
df_grid = gpd.GeoDataFrame(df_grid, geometry=df_grid.centroid)
df_grid['lon'] = df_grid.geometry.x
df_grid['lat'] = df_grid.geometry.y
df_grid = df_grid[['cell_id', 'lon', 'lat']]

In [None]:
df = sub.reset_index().copy()

df = df.merge(df_week, on=['cell_id', 'valid_time'], how='left')
df = df.merge(df_winter, on=['cell_id', 'valid_time'], how='left')
df = df.merge(df_dem, on=['cell_id'], how='left')
df = df.merge(df_grid, on=['cell_id'], how='left')
df = df.merge(df_modis, on=['cell_id', 'valid_time'])

df['dt_date'] = pd.to_datetime(df['valid_time'], format='%Y-%m-%d') 
df['dayofyear'] = df['dt_date'].dt.dayofyear
df['year'] = df['dt_date'].dt.year
df = df.drop(['dt_date'], axis=1)
df = df[cols]

# Run Model

## Read pickle

In [None]:
models_readed = []
with open('models/'+'models_final.pkl', 'rb') as f:
    while True:
        try:
            models_readed.append(pickle.load(f))
        except EOFError:
            break

## Models

In [None]:
rf, xgb_1, lgb_1, ctb_1, clf_meta = models_readed
zoo_names=['xgb_1', 'lgb_1', 'ctb_1']
zoo=[xgb_1, lgb_1, ctb_1]
df['rf_org_value_v2'] = rf.predict(df[['lat', 'lon', 'alt', 'year', 'dayofyear']])
Z=df.drop(['cell_id','valid_time', 'year'],axis=1)

## Predict and make submission

In [None]:
%%time
name=0
Z_meta_f=pd.DataFrame(columns=zoo_names, index=Z.index).fillna(value=0)

for model in zoo: 
    Z_meta_f[zoo_names[name]]=model.predict(Z)
    name+=1

for i in Z_meta_f.columns:
    Z_meta_f[Z_meta_f[i]<0]=0

res=pd.DataFrame(clf_meta.predict(Z_meta_f))
res.columns=['swe_pred']

res = pd.concat([df[['cell_id', 'valid_time']], res], axis=1).set_index(['valid_time', 'cell_id'])
sub.loc[sub.index.isin(res.index), 'swe'] = res['swe_pred']
res_pivot = sub.reset_index().pivot(index='cell_id', columns='valid_time', values='swe')

In [None]:
res_pivot.to_csv(f'sub_{str(datetime.now().date())}.csv', index=True)