In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import ee
import json
import pandas as pd
import numpy as np
import datetime

## Import Base Data Files

In [None]:
#ground_measures_metadata = pd.read_csv('/content/drive/MyDrive/snocast/eval/data/ground_measures_metadata.csv')
submission_format = pd.read_csv('/content/drive/MyDrive/snocast/eval/data/submission_format.csv')
run_date = '2022-02-10'
lookback = 15

In [None]:
# get latitude longitude for grids
f = open('/content/drive/MyDrive/snocast/eval/data/grid_cells.geojson')
grid_cells = json.load(f)
print('length grid_cells features: ', len(grid_cells['features']))

ids = []
lats = []
lons = []
bboxes = []

for grid_cell in grid_cells['features']:
    cell_id = grid_cell['properties']['cell_id']
    coordinates = grid_cell['geometry']['coordinates'][0]
    lon, lat = np.mean(coordinates, axis=0)
    northeast_corner = np.max(coordinates, axis=0)
    southwest_corner = np.min(coordinates, axis=0)
    # bbox = [min_lon, min_lat, max_lon, max_lat]
    bbox = np.concatenate([southwest_corner,northeast_corner])
    ids.append(cell_id)
    lats.append(lat)
    lons.append(lon)
    bboxes.append(bbox)

grid_cells_pd = pd.DataFrame({'location_id': ids, 
                             'latitude': lats, 
                             'longitude': lons, 
                             'bbox': bboxes})

In [None]:
df_parts = np.array_split(grid_cells_pd,5)

In [None]:
df_file_paths = []
for part, df_part in enumerate(df_parts):
  file_path = f'/content/drive/MyDrive/snocast/eval/data/modis/df_parts/df_{run_date}_{part}.parquet'
  df_part.to_parquet(file_path)
  df_file_paths.append(file_path)

In [None]:
file_path_0 = df_file_paths[0]
file_path_1 = df_file_paths[1]
file_path_2 = df_file_paths[2]
file_path_3 = df_file_paths[3]
file_path_4 = df_file_paths[4]

In [None]:
!nohup python /content/drive/MyDrive/snocast/eval/modis_parallel.py $file_path_0 -d $run_date -n 0 > modis_0.log &

In [None]:
!nohup python /content/drive/MyDrive/snocast/eval/modis_parallel.py $file_path_1 -d $run_date -n 1 > modis_1.log &

In [None]:
!nohup python /content/drive/MyDrive/snocast/eval/modis_parallel.py $file_path_2 -d $run_date -n 2 > modis_2.log &

In [None]:
!nohup python /content/drive/MyDrive/snocast/eval/modis_parallel.py $file_path_3 -d $run_date -n 3 > modis_3.log &

In [None]:
!nohup python /content/drive/MyDrive/snocast/eval/modis_parallel.py $file_path_4 -d $run_date -n 4 > modis_4.log &

In [None]:
terra_df_parts = []
aqua_df_parts = []

for i in range(5):
  terra_df_part = pd.read_parquet(f'/content/drive/MyDrive/snocast/eval/data/modis/modis_parts/modis_terra_{run_date}_{i}.parquet')
  terra_df_parts.append(terra_df_part)
  aqua_df_part = pd.read_parquet(f'/content/drive/MyDrive/snocast/eval/data/modis/modis_parts/modis_aqua_{run_date}_{i}.parquet')
  aqua_df_parts.append(aqua_df_part)

In [None]:
terra_df = pd.concat(terra_df_parts).reset_index(drop=True)
aqua_df = pd.concat(aqua_df_parts).reset_index(drop=True)

In [None]:
terra_df.shape

In [None]:
aqua_df.shape

In [None]:
terra_df.to_parquet(f'/content/drive/MyDrive/snocast/eval/data/modis/modis_terra_{run_date}.parquet')
aqua_df.to_parquet(f'/content/drive/MyDrive/snocast/eval/data/modis/modis_aqua_{run_date}.parquet')