### Imports and Paths

In [None]:
#imports
from google.colab import drive
from scipy.io import loadmat
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import numpy as np
import math
import os
import time
import datetime
import itertools
import h5py
import matplotlib.dates as mdates


#paths
drive.mount('/content/drive')
data_dir = "/content/drive/MyDrive/Stanford-solar-forecasting-dataset/data/"
data_path = data_dir + '2017_2019_images_pv_processed.hdf5'
output_dir = "/content/drive/MyDrive/Stanford-solar-forecasting-dataset/model_output/SUNSET_nowcast_2017_2019_data/"


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Sun tracking

In [None]:
def get_sun_position(threshold, img):

    # create a mask to only consider pixels inside the camera image (circle)
    dummy_img = np.zeros((64, 64), dtype=np.uint8)
    X, Y = np.meshgrid(np.arange(dummy_img.shape[1]), np.arange(dummy_img.shape[0]))
    center = (31, 31)
    radius = 32.5
    # calculate the distance of each point to the center
    dist = np.sqrt((X - center[1])**2 + (Y - center[0])**2)
    # create a mask with a circular shape
    circ_mask = np.zeros_like(dummy_img)
    circ_mask[dist <= radius] = 1

    #1: turn to B&W image
    img_bw = np.dot(img[..., :], [1/3, 1/3, 1/3])
    #2: apply threshold
    img_sun_mask = np.where(img_bw < (threshold/255.0), 0, 1)
    #apply mask
    img_sun_mask = img_sun_mask * circ_mask
    #3: calculate centroid
    sun_centroid = np.zeros(2)
    idxs = np.where(img_sun_mask == 1)
    if len(idxs[0] != 0):
        sun_centroid[0] = np.mean(idxs[0])
        sun_centroid[1] = np.mean(idxs[1])

    return sun_centroid

## Sun segmentation

In [None]:
def get_sun_seg(threshold, img):

    # create a mask to only consider pixels inside the camera image (circle)
    dummy_img = np.zeros((64, 64), dtype=np.uint8)
    X, Y = np.meshgrid(np.arange(dummy_img.shape[1]), np.arange(dummy_img.shape[0]))
    center = (31, 31)
    radius = 32.5
    # calculate the distance of each point to the center
    dist = np.sqrt((X - center[1])**2 + (Y - center[0])**2)
    # create a mask with a circular shape
    circ_mask = np.zeros_like(dummy_img)
    circ_mask[dist <= radius] = 1

    #1: turn to B&W image
    img_bw = np.dot(img[..., :], [1/3, 1/3, 1/3])
    #2: apply threshold
    img_sun_mask = np.where(img_bw < (threshold/255.0), 0, 1)
    #apply mask
    img_sun_mask = img_sun_mask * circ_mask

    return img_sun_mask

## Cloud Segmentation


In [None]:
# ASSUMPTION: the image channels are ordered as RGB -> CONFIRMED
def get_cloud_segmentation(img, sun_pos):
    nrbr_threshold = 0.05
    delta_nrbr_threshold = .175
    sun_radius = 7

    #mask for camera circle
    dummy_img = np.zeros((64, 64), dtype=np.uint8)
    center = (31, 31)
    radius = 32.5
    X, Y = np.meshgrid(np.arange(dummy_img.shape[1]), np.arange(dummy_img.shape[0]))
    # calculate the distance of each point to the center
    dist = np.sqrt((X - center[1])**2 + (Y - center[0])**2)
    # create a mask with a circular shape
    circ_mask = np.zeros_like(dummy_img)
    circ_mask[dist <= radius] = 1

    f,axarr = plt.subplots(1, 4,sharex=True, sharey = True)
    axarr[0].imshow(img)
    axarr[0].scatter(sun_pos[1], sun_pos[0], color='yellow')
    axarr[0].set_title('Original Image')

    #1: calculate the nrbr of the full image
    img_nrbr = (img[..., 2] - img[..., 0])/(img[..., 2] + img[..., 0])
    #2: apply nrbr threshold
    img_nrbr_mask = np.where(img_nrbr <= nrbr_threshold, 1, 0)
    img_nrbr_mask = img_nrbr_mask * circ_mask

    axarr[1].imshow(img_nrbr_mask)
    axarr[1].set_title('NRBR Result')

    #3: find image in csl with similar sun pos
    #find closest sun position, then get image from csl
    diff = csl_sun_pos - sun_pos
    deltas = np.asarray([np.sqrt(difference[0]**2+difference[1]**2) for difference in diff])
    idx = deltas.argmin()
    cs_img = csl_images[idx]
    #4: calculate nrbr_csl
    cs_img_nrbr = (cs_img[..., 2] - cs_img[..., 0])/(cs_img[..., 2] + cs_img[..., 0])
    #5: calculate delta_nrbr
    delta_nrbr = np.abs(img_nrbr - cs_img_nrbr)
    #6: apply threshold
    img_delta_nrbr_mask = np.where(delta_nrbr <= delta_nrbr_threshold, 0, 1)
    img_delta_nrbr_mask = img_delta_nrbr_mask * circ_mask

    axarr[2].imshow(img_delta_nrbr_mask)
    axarr[2].set_title('Delta NRBR Result')

    #7: calculate delta cloudiness
    delta_cloudiness = sum(sum(img_delta_nrbr_mask))/(img_delta_nrbr_mask.shape[0] * img_delta_nrbr_mask.shape[1])
    print('cloudiness: ' ,delta_cloudiness)
    #8: decide what nrbr to use
    if delta_cloudiness < 0.045:
        #delta nrbr
        print('---------DELTA NRBR----------')

        axarr[3].imshow(img_delta_nrbr_mask)
        axarr[3].set_title('Final Result')
        plt.show()

        return img_delta_nrbr_mask
    elif delta_cloudiness < 0.35:
        print('--------DIFFICULT OPTION--------')
        # merge delta_nrbr for inside circumsolar area and normal nrbr for outside circumsolar area

        if (np.all((sun_pos==0))):
          axarr[3].imshow(img_nrbr_mask)
          axarr[3].set_title('Final Result')
          plt.show()

          return img_nrbr_mask
        else:
          # create a mask to only consider pixels inside the camera image (circle)
          dummy_img = np.zeros((64, 64), dtype=np.uint8)
          center = (sun_pos[0], sun_pos[1])
          X, Y = np.meshgrid(np.arange(dummy_img.shape[1]), np.arange(dummy_img.shape[0]))
          # calculate the distance of each point to the center
          dist = np.sqrt((X - center[1])**2 + (Y - center[0])**2)
          # create a mask with a circular shape
          non_sun_circ_mask = np.zeros_like(dummy_img)
          non_sun_circ_mask[dist > sun_radius] = 1

          img_nrbr_mask_nonsun = img_nrbr_mask * non_sun_circ_mask

          axarr[3].imshow(img_nrbr_mask_nonsun)
          axarr[3].set_title('Final Result')
          plt.show()

          return img_nrbr_mask_nonsun
    else:
        #normal nrbr
        print('----------NRBR---------')

        axarr[3].imshow(img_nrbr_mask)
        axarr[3].set_title('Final Result')
        plt.show()

        return img_nrbr_mask

## Image Illumination

In [None]:
def get_img_ill(img):

  return np.sum(img)/(img.shape[0]*img.shape[1]*img.shape[2])

## Sun area mean pixel intensity (SAMPI)

In [None]:
def get_sampi(img, sun_pos):

    sun_radius = 7

    if (np.all((sun_pos==0))):
        return 0
    else:
        # create a mask to only consider pixels inside the camera image (circle)
        dummy_img = np.zeros((64, 64), dtype=np.uint8)
        center = (sun_pos[0], sun_pos[1])
        X, Y = np.meshgrid(np.arange(dummy_img.shape[1]), np.arange(dummy_img.shape[0]))
        # calculate the distance of each point to the center
        dist = np.sqrt((X - center[1])**2 + (Y - center[0])**2)
        # create a mask with a circular shape
        sun_circ_mask = np.zeros_like(dummy_img)
        sun_circ_mask[dist <= sun_radius] = 1
        sun_circ_mask = np.expand_dims(sun_circ_mask, axis=-1)

        img_mask_sun = img * sun_circ_mask

        sampi = (np.sum(img_mask_sun))/(np.sum(sun_circ_mask)*3.0)


        return sampi

## Create Clear Sky Library

In [None]:
#import the trainval ground truth
times_trainval = np.load(data_dir+"times_trainval.npy", allow_pickle=True)

# import test ground truth
times_test = np.load(os.path.join(data_dir,"times_test.npy"),allow_pickle=True)
with h5py.File(data_path,'r') as f:

    # read in the data
    images_log_test = f['test']['images_log'][::5]
    images_log_trainval = f['trainval']['images_log'][::5]

# process image data
images_log_test = (images_log_test/255.0).astype('float32')
images_log_trainval = (images_log_trainval/255.0).astype('float32')


In [None]:
# establish Clear Sky Library (CSL)
csl_dates = [(2017, 5, 20), (2017, 8, 15), (2017, 9, 23), (2017, 10, 22)] #last date is in test set
csl_dates = [datetime.date(day[0],day[1],day[2]) for day in csl_dates]

csl_images = np.empty((0, 64, 64, 3))

dates_trainval = np.asarray([times.date() for times in times_trainval])
dates_trainval = dates_trainval[::5]

dates_test = np.asarray([times.date() for times in times_test])
dates_test = dates_test[::5]

for i, date in enumerate(csl_dates):
    date_mask_trainval = (dates_trainval == date)
    date_mask_test = (dates_test == date)

    csl_images = np.concatenate((csl_images, images_log_test[date_mask_test]), axis=0)
    csl_images = np.concatenate((csl_images, images_log_trainval[date_mask_trainval]), axis=0)



In [None]:
THRESHOLD = 230

csl_sun_pos = np.zeros((csl_images.shape[0], 2))

for i, image in enumerate(csl_images):
    csl_sun_pos[i] = get_sun_position(THRESHOLD, image)

    #plt.imshow(csl_images[i])
    #plt.scatter(csl_sun_pos[i, 1], csl_sun_pos[i, 0], color='yellow')
    #plt.show()

Output hidden; open in https://colab.research.google.com to view.

In [None]:
# delete the old values
with h5py.File(data_path,'a') as f:
  del f['clearskylibrary']

# add the images and sun position to folder calles clearskyindex
with h5py.File(data_path,'a') as f:
    csl = f.create_group('clearskylibrary')
    csl.create_dataset('csl_images', csl_images.shape, dtype='f')
    csl.create_dataset('csl_sun_pos', csl_sun_pos.shape, dtype='f')
    f['clearskylibrary']['csl_images'][...] = csl_images
    f['clearskylibrary']['csl_sun_pos'][...] = csl_sun_pos

## Import data


In [None]:
#import the trainval ground truth
times_trainval = np.load(data_dir+"times_trainval.npy", allow_pickle=True)

# import test ground truth
#times_test = np.load(os.path.join(data_dir,"times_test.npy"),allow_pickle=True)
with h5py.File(data_path,'r') as f:

    # read in the data
    #images_log_test = f['test']['images_log'][...]
    #pv_log_test = f['test']['pv_log'][...]

    #pv_log_trainval = f['trainval']['pv_log'][...]
    images_log_trainval = f['trainval']['images_log'][...]

    csl_images = f['clearskylibrary']['csl_images'][...]
    csl_sun_pos = f['clearskylibrary']['csl_sun_pos'][...]

# process image data
#images_log_test = (images_log_test/255.0).astype('float32')
#pv_log_test = pv_log_test.astype('float32')

images_log_trainval = (images_log_trainval/255.0).astype('float32')
#pv_log_trainval = pv_log_trainval.astype('float32')


In [None]:
# formulate sunny and cloudy test days
sunny_dates = [(2017,9,15),(2017,10,6),(2017,10,22),
               (2018,2,16),(2018,6,12),(2018,6,23),
               (2019,1,25),(2019,6,23),(2019,7,14),(2019,10,14)]
cloudy_dates = [(2017,6,24),(2017,9,20),(2017,10,11),
                (2018,1,25),(2018,3,9),(2018,10,4),
                (2019,5,27),(2019,6,28),(2019,8,10),(2019,10,19)]

sunny_dates_test = [datetime.date(day[0],day[1],day[2]) for day in sunny_dates]
cloudy_dates_test = [datetime.date(day[0],day[1],day[2]) for day in cloudy_dates]

all_dates_test = sunny_dates_test + cloudy_dates_test

dates_test = np.asarray([times.date() for times in times_test])

## generate mask for the sunny days
mask = np.zeros(len(pv_log_test),dtype=bool)
for i in sunny_dates_test:
    mask[np.where(dates_test==i)[0]]=1

## apply the mask to the dataset
times_test_sunny = times_test[mask]
pv_log_test_sunny = pv_log_test[mask]
images_log_test_sunny = images_log_test[mask]
#prediction_ensemble_sunny = prediction_ensemble[mask]
print("times_test_sunny.shape:",times_test_sunny.shape)

times_test_cloudy = times_test[~mask]
pv_log_test_cloudy = pv_log_test[~mask]
images_log_test_cloudy = images_log_test[~mask]
#prediction_ensemble_cloudy = prediction_ensemble[~mask]
print("times_test_cloudy.shape:",times_test_cloudy.shape)

## Calculate and save


In [None]:
THRESHOLD = 230

sun_pos_trainval = np.zeros((images_log_trainval.shape[0], 2))
cloud_seg_trainval = np.zeros((images_log_trainval.shape[0], images_log_trainval.shape[1], images_log_trainval.shape[2], 1))
sun_seg_trainval = np.zeros((images_log_trainval.shape[0], images_log_trainval.shape[1], images_log_trainval.shape[2]))
image_ill_trainval = np.zeros((images_log_trainval.shape[0], 1))
sampi_trainval = np.zeros((images_log_trainval.shape[0], 1))

non_identified_counter_trainval = 0
identified_counter_trainval = 0

for i, image in enumerate(images_log_trainval):

    sun_pos_trainval[i] = get_sun_position(THRESHOLD, image)
    cloud_seg_trainval[i] = get_cloud_segmentation(image, sun_pos_trainval[i])
    sun_seg_trainval[i] = get_sun_seg(THRESHOLD, image)
    image_ill_trainval[i] = get_img_ill(image)
    sampi_trainval[i] = get_sampi(image, sun_pos_trainval[i])

    if np.all((sun_pos_trainval[i]==0)):
      non_identified_counter_trainval += 1
      #plt.imshow(image)
      #plt.scatter(sun_pos[1], sun_pos[0])
      #plt.show()
    else:
      identified_counter_trainval += 1


perc_identified_trainval = (identified_counter_trainval/(identified_counter_trainval+non_identified_counter_trainval))*100
print('Percentage of images in the trainval set where the sun is identified: ', perc_identified_trainval, '%')

In [None]:
THRESHOLD = 230

sun_pos_test = np.zeros((images_log_test.shape[0], 2))
cloud_seg_test = np.zeros((images_log_test.shape[0], images_log_test.shape[1], images_log_test.shape[2], 1))
sun_seg_test = np.zeros((images_log_test.shape[0], images_log_test.shape[1], images_log_test.shape[2]))
image_ill_test = np.zeros((images_log_test.shape[0], 1))
sampi_test = np.zeros((images_log_test.shape[0], 1))

non_identified_counter_test = 0
identified_counter_test = 0

for i, image in enumerate(images_log_test):

    sun_pos_test[i] = get_sun_position(THRESHOLD, image)
    cloud_seg_test[i] = get_cloud_segmentation(image, sun_pos_test[i])
    sun_seg_test[i] = get_sun_seg(THRESHOLD, image)
    image_ill_test[i] = get_img_ill(image)
    sampi_test[i] = get_sampi(image, sun_pos_test[i])

    if np.all((sun_pos_test[i]==0)):
      non_identified_counter_test += 1
      #plt.imshow(image)
      #plt.scatter(sun_pos[1], sun_pos[0])
      #plt.show()
    else:
      identified_counter_test += 1



perc_identified_test = (identified_counter_test/(identified_counter_test+non_identified_counter_test))*100
print('Percentage of images in the test set where the sun is identified is: ', perc_identified_test, '%')

perc_identified_tot = ((identified_counter_test+identified_counter_trainval)/(identified_counter_test+non_identified_counter_test+identified_counter_trainval+non_identified_counter_trainval))*100
print('Percentage of images in the whole set where the sun is identified i: ', perc_identified_tot, '%')


In [None]:
# add the cloud_seg and sun_pos to the trainval set
with h5py.File(data_path,'a') as f:
    trainval = f.create_group('trainval')
    trainval.create_dataset('sun_pos', sun_pos_trainval.shape, dtype='f')
    trainval.create_dataset('cloud_seg', cloud_seg_trainval.shape, dtype='f')
    f['trainval']['sun_pos'][...] = sun_pos_trainval
    f['trainval']['cloud_seg'][...] = cloud_seg_trainval
    trainval.create_dataset('sun_seg', sun_seg_trainval.shape, dtype='f')
    trainval.create_dataset('image_ill', image_ill_trainval.shape, dtype='f')
    trainval.create_dataset('sampi', sampi_trainval.shape, dtype='f')
    f['trainval']['sun_seg'][...] = sun_seg_trainval
    f['trainval']['image_ill'][...] = image_ill_trainval
    f['trainval']['sampi'][...] = sampi_trainval

    test = f.create_group('test')
    test.create_dataset('sun_pos', sun_pos_test.shape, dtype='f')
    test.create_dataset('cloud_seg', cloud_seg_test.shape, dtype='f')
    f['test']['sun_pos'][...] = sun_pos_test
    f['test']['cloud_seg'][...] = cloud_seg_test
    test.create_dataset('sun_seg', sun_seg_test.shape, dtype='f')
    test.create_dataset('image_ill', image_ill_test.shape, dtype='f')
    test.create_dataset('sampi', sampi_test.shape, dtype='f')
    f['test']['sun_seg'][...] = sun_seg_test
    f['test']['image_ill'][...] = image_ill_test
    f['test']['sampi'][...] = sampi_test