### Imports and Paths

In [None]:
#imports
#from google.colab import drive
#from scipy.io import loadmat
import matplotlib.pyplot as plt
#import tensorflow as tf
#from tensorflow import keras
import numpy as np
#import math
import os
import time
import datetime
#import itertools
import h5py
#import matplotlib.dates as mdates


#paths
#drive.mount('/content/drive')
data_dir = "/homedtic/aberresheim/Stanford-solar-forecasting-dataset/data/"
data_path = data_dir + '2017_2019_images_pv_processed.hdf5'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Sun tracking

In [None]:
def get_sun_position(threshold, img):

    # create a mask to only consider pixels inside the camera image (circle)
    dummy_img = np.zeros((64, 64), dtype=np.uint8)
    X, Y = np.meshgrid(np.arange(dummy_img.shape[1]), np.arange(dummy_img.shape[0]))
    center = (31, 31)
    radius = 32.5
    # calculate the distance of each point to the center
    dist = np.sqrt((X - center[1])**2 + (Y - center[0])**2)
    # create a mask with a circular shape
    circ_mask = np.zeros_like(dummy_img)
    circ_mask[dist <= radius] = 1
    
    #1: turn to B&W image
    img_bw = np.dot(img[..., :], [1/3, 1/3, 1/3])
    #2: apply threshold
    img_sun_mask = np.where(img_bw < (threshold/255.0), 0, 1)
    #apply mask
    img_sun_mask = img_sun_mask * circ_mask
    #3: calculate centroid
    sun_centroid = np.zeros(2)
    idxs = np.where(img_sun_mask == 1)
    if len(idxs[0] != 0):
        sun_centroid[0] = np.mean(idxs[0])
        sun_centroid[1] = np.mean(idxs[1])

    return sun_centroid

## Create Clear Sky Library

In [None]:
#import the trainval ground truth
times_trainval = np.load(data_dir+"times_trainval.npy", allow_pickle=True)

# import test ground truth
times_test = np.load(os.path.join(data_dir,"times_test.npy"),allow_pickle=True)
with h5py.File(data_path,'r') as f:

    # read in the data
    images_log_test = f['test']['images_log'][::5]
    images_log_trainval = f['trainval']['images_log'][::5]

# process image data
images_log_test = (images_log_test/255.0).astype('float32')
images_log_trainval = (images_log_trainval/255.0).astype('float32')


In [None]:
# establish Clear Sky Library (CSL)
csl_dates = [(2017, 5, 20), (2017, 8, 15), (2017, 9, 23), (2017, 10, 22)] #last date is in test set
csl_dates = [datetime.date(day[0],day[1],day[2]) for day in csl_dates]

csl_images = np.empty((0, 64, 64, 3))

dates_trainval = np.asarray([times.date() for times in times_trainval])
dates_trainval = dates_trainval[::5] 

dates_test = np.asarray([times.date() for times in times_test])
dates_test = dates_test[::5] 

for i, date in enumerate(csl_dates):
    date_mask_trainval = (dates_trainval == date)
    date_mask_test = (dates_test == date)

    csl_images = np.concatenate((csl_images, images_log_test[date_mask_test]), axis=0)
    csl_images = np.concatenate((csl_images, images_log_trainval[date_mask_trainval]), axis=0)



In [None]:
THRESHOLD = 230

csl_sun_pos = np.zeros((csl_images.shape[0], 2))

for i, image in enumerate(csl_images):
    csl_sun_pos[i] = get_sun_position(THRESHOLD, image)

    #plt.imshow(csl_images[i])
    #plt.scatter(csl_sun_pos[i, 1], csl_sun_pos[i, 0], color='yellow')
    #plt.show()

In [None]:
with h5py.File(data_path,'a') as f:    
  del f['clearskylibrary']

# add the images and sun position to folder calles clearskylibrary
with h5py.File(data_path,'a') as f:    
    csl = f.create_group('clearskylibrary')
    csl.create_dataset('csl_images', csl_images.shape, dtype='f')
    csl.create_dataset('csl_sun_pos', csl_sun_pos.shape, dtype='f')    
    f['clearskylibrary']['csl_images'][...] = csl_images
    f['clearskylibrary']['csl_sun_pos'][...] = csl_sun_pos