In [1]:
import numpy as np
import pandas as pd
import h5py
import hdf5_interface
import matplotlib.pyplot as plt
import requests
from bokeh.plotting import figure, show, output_file, output_notebook
from bokeh.models import ColumnDataSource, Range1d, LabelSet, Label, Legend

In [18]:
def hdf5_to_dataframe(hdf5_filename, location_name, panel_name):
    """This function extracts data from an HDF5 file and loads it into a pandas dataframe"""
    #Load the HDF5 file data
    hdf5_file = h5py.File('{}.h5'.format(hdf5_filename), 'r')
    hdf5_location = hdf5_file.get(location_name)
    panel_location = hdf5_location.get(panel_name)
    dataframe = pd.DataFrame()
    for keys in panel_location.keys():
        dataframe[str(keys)] = panel_location[str(keys)]
    dataframe['Month'] = dataframe['Month'].astype(int)
    return dataframe

def daily_to_monthly_energy(file_name, location_name, panel_name):
    solar_dataframe = hdf5_to_dataframe(file_name, location_name, panel_name)
    new_dataframe = pd.DataFrame(columns = ['Year', 'Month', 'Energy', 'Interpolate'])
    previous_month_tracker = solar_dataframe['Month'][0]
    Sum = 0
    interpolated = 0
    j = 0 
    year_array = np.array(np.NaN)
    month_array = np.array(np.NaN)
    interpolation_array = np.array(np.NaN)
    energy_array = np.array(np.NaN)
    for i in range(len(solar_dataframe.index)):
        if solar_dataframe['Month'][i] == previous_month_tracker:
            Sum = Sum + solar_dataframe['Energy'][i]
            interpolated = interpolated + solar_dataframe['Interpolate'][i]
        else:
            year_array = np.append(year_array, solar_dataframe['Year'][i])
            month_array = np.append(month_array, solar_dataframe['Month'][i])
            if interpolated > 0:
                interpolation_array = np.append(interpolation_array, 1)
            else:
                interpolation_array = np.append(interpolation_array, 0)
            energy_array = np.append(energy_array, Sum)
            Sum = 0
            interpolated = 0
            j = j + 1
            previous_month_tracker = solar_dataframe['Month'][i]
    new_dataframe['Energy'] = energy_array.astype(int)
    new_dataframe['Month'] = month_array.astype(int)
    new_dataframe['Year'] = year_array.astype(int)
    new_dataframe['Interpolate'] = interpolation_array.astype(int)
    new_dataframe = new_dataframe.drop(0).reset_index(drop=True)
    
    #new_dataframe['DC Capacity'][1] = solar_dataframe['DC Capacity'][0]
    #new_dataframe['Location'][1] = solar_dataframe['Location'][0]
    return new_dataframe

In [19]:
def pvwatts_tmy2(lattitude,longitude):
    # Get the data from the PV Watts --TMY2
    list_parameters = {"formt": 'JSON', "api_key": "spJFj2l5ghY5jwk7dNfVYs3JHbpR6BOGHQNO8Y9Z", "system_capacity": 18, "module_type": 0, "losses": 14.08,
                  "array_type": 0, "tilt": 50, "azimuth": 180, "lat": lattitude, "lon": longitude, "dataset": 'tmy2'}
    json_response = requests.get("https://developer.nrel.gov/api/pvwatts/v6", params = list_parameters).json()
    TMY2 = pd.DataFrame(data = json_response['outputs'])
    return TMY2

def pvwatts_tmy3(lattitude,longitude):
    # Get the data from the PV Watts --TMY3
    list_parameters = {"formt": 'JSON', "api_key": "spJFj2l5ghY5jwk7dNfVYs3JHbpR6BOGHQNO8Y9Z", "system_capacity": 18, "module_type": 0, "losses": 14.08,
                  "array_type": 0, "tilt": 50, "azimuth": 180, "lat": lattitude, "lon": longitude, "dataset": 'tmy3'}
    json_response = requests.get("https://developer.nrel.gov/api/pvwatts/v6", params = list_parameters).json()
    TMY3 = pd.DataFrame(data = json_response['outputs'])
    return TMY3

In [4]:
coordinate = pd.DataFrame(columns=['location','latitude','longitude'])
coordinate['location']=['Ambler-Shungnak-Kobuk','Anchorage','Bethel','Chickaloon',
                        'Deering','Denali Park','Fairbanks','Fort Yukon',
                        'Galena-Koyukuk-Ruby', 'Homer','Naknek','Noatak',
                        'Noorvik','Soldotna','Valdez','Wasilla-Palmer']

coordinate['latitude']=[66.995834, 61.193625, 60.794938, 61.823570, 
                        66.069413, 63.537277, 64.838033, 66.571563,
                        64.782991, 59.652521, 58.728349, 67.570921, 
                        66.836039, 60.486370, 61.128663, 61.582242]


coordinate['longitude']=[ -157.377096, -149.694974, -161.770716, -148.450442,
                         -162.766760,  -150.985453, -147.668970,  -145.250173,
                         -156.744933, -151.536496, -157.017444, -162.967490,
                          -161.041913, -151.060702, -146.353366, -149.441001]


coordinate

Unnamed: 0,location,latitude,longitude
0,Ambler-Shungnak-Kobuk,66.995834,-157.377096
1,Anchorage,61.193625,-149.694974
2,Bethel,60.794938,-161.770716
3,Chickaloon,61.82357,-148.450442
4,Deering,66.069413,-162.76676
5,Denali Park,63.537277,-150.985453
6,Fairbanks,64.838033,-147.66897
7,Fort Yukon,66.571563,-145.250173
8,Galena-Koyukuk-Ruby,64.782991,-156.744933
9,Homer,59.652521,-151.536496


In [5]:
my_file = h5py.File("solar_panel_data_alaska.h5", 'r')

In [21]:
result = pd.DataFrame(columns = ['location','#','average_capacity',
                                 'average_annual','TMY2','TMY3'])
for i in range(len(coordinate)):
    result.loc[i,'location'] = coordinate['location'][i]
    location_name = coordinate['location'][i]
    location_hdf5 = my_file.get(location_name)
    location=pd.DataFrame(columns=['Date'])
    a = []
    ca = []
    no=0
    for name in location_hdf5:
        no = no+1
        capacity = location_hdf5[name].attrs.__getitem__("DC Capacity")
        ca.append(capacity)
        if location_hdf5[name].keys().__contains__('Day'):
            base = daily_to_monthly_energy('solar_panel_data_alaska', location_name, name)
        else:    
            base = hdf5_to_dataframe('solar_panel_data_alaska',location_name, name )
            # read data
        base = base.drop(['Year', 'Interpolate'], axis=1)    
        average = base.groupby('Month').mean()
        summation = np.sum(average,axis=0)/capacity
        a.append(float(summation))
    result.loc[i,'average_capacity'] = np.mean(ca)    
    result.loc[i,'average_annual'] = np.mean(a)
    result.loc[i,'#'] = no
    
    if i in [0,7,8,9]:
        pass
    else:
        result.loc[i,'TMY2'] = pvwatts_tmy2(coordinate['latitude'][i],coordinate['longitude'][i])['ac_annual'][0]/18
    
    result.loc[i,'TMY3'] = pvwatts_tmy3(coordinate['latitude'][i],coordinate['longitude'][i])['ac_annual'][0]/18
    

    
result
                         

Unnamed: 0,location,#,average_capacity,average_annual,TMY2,TMY3
0,Ambler-Shungnak-Kobuk,5,5.094,106106000.0,,889.79
1,Anchorage,28,6.29929,664.294,923.263,947.962
2,Bethel,3,6.03333,934.957,977.303,971.321
3,Chickaloon,1,3.2,866.25,980.491,866.645
4,Deering,1,11.1,823.602,991.661,958.353
5,Denali Park,1,4.0,760.958,980.491,1077.57
6,Fairbanks,20,5.058,892.562,1021.9,1025.22
7,Fort Yukon,1,18.0,908.72,,1048.0
8,Galena-Koyukuk-Ruby,5,5.752,794.061,,935.683
9,Homer,2,5.465,853.275,,943.895
