# Prepare Dummy Data
Convert XArray to Pickled Dictionary

Opens data from solar irradiance and cloud coverage files with xarray, does a little preprocessing, and converts to a pickled dictionary for use as dummy data for the plugin

See [this link](https://drive.google.com/drive/folders/1--4frW2RCqb0Axh356DfUkCZfKdl8aSm?usp=sharing) for data. Folder is shared to select individuals. See ```README.ipynb``` for information on data used in this project.

In [9]:
import pickle
import xarray as xr
import numpy as np

In [20]:
sol_irr_path = 'sgpradflux1longC1.c2.20160301.060000.nc'
cloud_coverage_path = 'sgptsiskycoverC1.b1.20160301.000000.cdf'

sol_irr_var = 'downwelling_shortwave'
cloud_coverage_var = 'percent_opaque'

resample_rate = '15min'

TOPIC_HISTORICAL_SOL_IRR = "env.solar.irradiance"
TOPIC_HISTORICAL_CLOUD_COVERAGE = "env.coverage.cloud"

In [21]:
def open_file_with_xr(path):
    data = xr.open_mfdataset(path)
    return data

In [27]:
# structured as: {name of variable in app.py: [file path, name of variable we want from that file]}
paths = {TOPIC_HISTORICAL_SOL_IRR: [sol_irr_path, sol_irr_var],
         TOPIC_HISTORICAL_CLOUD_COVERAGE: [cloud_coverage_path, cloud_coverage_var]}

# create a dicitonary of xr datasets for each variable
xr_dict = {}
for name, xr_details in paths.items():
    path = xr_details[0]
    var_name = xr_details[1]
    data = open_file_with_xr(path)[var_name]
    xr_dict[name] = data

# replace -100's with 0's in cloud coverage data
cloud_cover = xr_dict[TOPIC_HISTORICAL_CLOUD_COVERAGE]
cloud_cover = cloud_cover.where(cloud_cover != -100, drop=True)
xr_dict[TOPIC_HISTORICAL_CLOUD_COVERAGE] = cloud_cover

# get the intersection of each variable's dataset (intersection of time points)
merged = xr.merge(xr_dict.values(), join='inner')

# resample. app.py will scale format into time series, but not resample
merged = merged.resample(time=resample_rate).mean()

# now take each variable's data and put it back into a dictionary for pickling
data_dict = {}
for name, xr_details in paths.items():
    var_name = xr_details[1]
    data_dict[name] = merged[var_name].values

# pickle the data dict
with open('20160301_ex_data.pickle', 'wb') as handle:
    pickle.dump(data_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [31]:
# If you use the dummy data (20160301_ex_data.pickle), compare to the actual values here!
data_dict[TOPIC_HISTORICAL_SOL_IRR][16:16+4]

array([717.6733 , 737.48663, 753.0933 , 764.0934 ], dtype=float32)