In [1]:
import h5py
import hdf5_interface
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from bokeh.plotting import figure, show, output_file, output_notebook
from bokeh.models import ColumnDataSource, Range1d, LabelSet, Label
import requests
from bokeh.palettes import Spectral4


In [4]:
my_file = h5py.File("solar_panel_data_alaska.h5", 'r')
fairbanks = my_file.get('Fairbanks')

In [60]:
hdf5_to_dataframe("solar_panel_data_alaska",'Fairbanks', 'Albin Fairbanks')

Unnamed: 0,Day,Energy,Interpolate,Month,Year
0,29,18.996,0,7,2014
1,30,18.455,0,7,2014
2,31,11.325,0,7,2014
3,1,1.626,0,8,2014
4,2,1.560,0,8,2014
5,3,12.551,0,8,2014
6,4,11.741,0,8,2014
7,5,2.715,0,8,2014
8,6,17.609,0,8,2014
9,7,19.814,0,8,2014


In [5]:
fairbanks.keys()

<KeysViewHDF5 ['Albin Fairbanks', 'Bonaire Fairbanks', 'Braun Fairbanks', 'Dashevsky Fairbanks', 'Deer Fairbanks', 'Dukeminier Fairbanks', 'FNSBSD-FMD', 'Grubis Fairbanks', 'Hall North Pole', 'Jorgenson Fairbanks', 'Nortech Tracking', 'RuralAK CCHRC_Sharp170', 'RuralAK CCHRC_SolarWorld165', 'RuralAK Nenana', 'Spirit of Alaska Fairbanks Gillam', 'Spirit of Alaska Fairbanks Johansen', 'Tran Fairbanks', 'UAF Engineering Building', 'UAF Fairbanks', 'Weissman Fairbanks']>

In [6]:
# Fairbanks
# Get the data from the PV Watts --TMY2
list_parameters = {"formt": 'JSON', "api_key": "spJFj2l5ghY5jwk7dNfVYs3JHbpR6BOGHQNO8Y9Z", "system_capacity": 18, "module_type": 0, "losses": 14.08,
              "array_type": 0, "tilt": 50, "azimuth": 180, "lat": 64.84, "lon": -147.76, "dataset": 'tmy2'}
json_response = requests.get("https://developer.nrel.gov/api/pvwatts/v6", params = list_parameters).json()
TMY2 = pd.DataFrame(data = json_response['outputs'])
# Get the data from the PV Watts --TMY3
list_parameters = {"formt": 'JSON', "api_key": "spJFj2l5ghY5jwk7dNfVYs3JHbpR6BOGHQNO8Y9Z", "system_capacity": 18, "module_type": 0, "losses": 14.08,
              "array_type": 0, "tilt": 50, "azimuth": 180, "lat": 64.84, "lon": -147.76, "dataset": 'tmy3'}
json_response = requests.get("https://developer.nrel.gov/api/pvwatts/v6", params = list_parameters).json()
TMY3 = pd.DataFrame(data = json_response['outputs'])

In [7]:
def hdf5_to_dataframe(hdf5_filename, location_name, panel_name):
    """This function extracts data from an HDF5 file and loads it into a pandas dataframe"""
    #Load the HDF5 file data
    hdf5_file = h5py.File('{}.h5'.format(hdf5_filename), 'r')
    hdf5_location = hdf5_file.get(location_name)
    panel_location = hdf5_location.get(panel_name)
    dataframe = pd.DataFrame()
    for keys in panel_location.keys():
        dataframe[str(keys)] = panel_location[str(keys)]
    dataframe['Month'] = dataframe['Month'].astype(int)
    return dataframe

# Calculate monthly production

In [None]:
location=pd.DataFrame(columns=['Month', 'Year'])
a=0
for name in fairbanks:
    capacity = fairbanks[name].attrs.__getitem__("DC Capacity")
    base = hdf5_to_dataframe('alaska_solar_data_storage','Fairbanks', name )
    base['Energy'] = base['Energy']/capacity
    location = pd.merge(location, base, on = ['Month', 'Year'], how='outer',suffixes=(a, a+1))
    a = a+1
    
# set up a dataframe to store TMY2&3 ac_monthly
pv = pd.DataFrame()
pv['TMY2'] = TMY2.ac_monthly
pv['TMY3'] = TMY3.ac_monthly
pv['Month'] = [1,2,3,4,5,6,7,8,9,10,11,12]

result = pd.merge(location, pv, on = ['Month'], how='outer' )
#merge PVWatts data into location data


result['Date'] = ""
# Change the date into a datetime format
for i in range(len(result)):
    result['Date'][i] = str(result['Year'][i]) + '-' + str(result['Month'][i])    
    result['Date'] = pd.to_datetime(result['Date'])


result = result.sort_values(by='Date')
# sort by date

result

In [None]:
xaxis=result['Date']  
# setting x axis with Date

tmy2 = result['TMY2']/18
tmy3 = result['TMY3']/18
# store tmy2&3 data


result = result.drop(['Date', 'Month', 'Year', 'TMY2', 'TMY3'], axis=1)
# delet Date column to calculate median value and maxium value

result['Median'] = result.median(1)
result["Max"] = result.max(1)
# adding median and max value into dataframe
result

In [None]:
# plot median and max value vs. date
output_notebook()
p = figure(plot_width=600, plot_height=300, x_axis_type='datetime')

# title style
p.title.text='The Solar Production in Fairbanks'
p.title.align = 'left'
p.title.text_font_size = "25px"

# add the number of panels in the figure
citation = Label(x=10, y=180, x_units='screen', y_units='screen',
                 text= str(len(fairbanks)) + ' panels here', render_mode='css')
                 #border_line_color='black', border_line_alpha=1.0,
                 #background_fill_color='white', background_fill_alpha=1.0)
p.add_layout(citation)

# draw lines
p.line(xaxis, result['Max'], line_width=1, color='red', legend = 'Best')
p.line(xaxis, result['Median'], line_width=1, color='blue', legend = 'Median')
p.line(xaxis, tmy2, line_width=1, color='green', legend = 'TMY2')
p.line(xaxis, tmy3, line_width=1, color='orange', legend = 'TMY3')

# add labels
p.xaxis.axis_label = 'Month'
p.yaxis.axis_label = 'Monthly Production(kWh)'
p.legend.click_policy="hide"
show(p)

 # Calculate normalized annual production

In [8]:
def annual_Norm(dataframe):
    lenth_list = list(range(12,len(dataframe.index)))
    annual_values = []
    month = []
    for i in range(len(lenth_list)):
        single_values = dataframe['Energy'][lenth_list[i]-12:lenth_list[i]].sum()/capacity
        #rolling_average.append(each_period)
        single_month = dataframe['Date'][lenth_list[i]]
        annual_values.append(single_values)
        month.append(single_month)
    return(annual_values,month)

In [9]:
def daily_to_monthly_energy(file_name, location_name, panel_name):
    solar_dataframe = hdf5_to_dataframe(file_name, location_name, panel_name)
    new_dataframe = pd.DataFrame(columns = ['Year', 'Month', 'Energy', 'Interpolate'])
    previous_month_tracker = solar_dataframe['Month'][0]
    Sum = 0
    interpolated = 0
    j = 0 
    year_array = np.array(np.NaN)
    month_array = np.array(np.NaN)
    interpolation_array = np.array(np.NaN)
    energy_array = np.array(np.NaN)
    for i in range(len(solar_dataframe.index)):
        if solar_dataframe['Month'][i] == previous_month_tracker:
            Sum = Sum + solar_dataframe['Energy'][i]
            interpolated = interpolated + solar_dataframe['Interpolate'][i]
        else:
            year_array = np.append(year_array, solar_dataframe['Year'][i])
            month_array = np.append(month_array, solar_dataframe['Month'][i])
            if interpolated > 0:
                interpolation_array = np.append(interpolation_array, 1)
            else:
                interpolation_array = np.append(interpolation_array, 0)
            energy_array = np.append(energy_array, Sum)
            Sum = 0
            interpolated = 0
            j = j + 1
            previous_month_tracker = solar_dataframe['Month'][i]
    new_dataframe['Energy'] = energy_array.astype(int)
    new_dataframe['Month'] = month_array.astype(int)
    new_dataframe['Year'] = year_array.astype(int)
    new_dataframe['Interpolate'] = interpolation_array.astype(int)
    new_dataframe = new_dataframe.drop(0).reset_index(drop=True)
    
    #new_dataframe['DC Capacity'][1] = solar_dataframe['DC Capacity'][0]
    #new_dataframe['Location'][1] = solar_dataframe['Location'][0]
    return new_dataframe

In [10]:
location=pd.DataFrame(columns=['Date'])
a=0
for name in fairbanks:
    if fairbanks[name].keys().__contains__('Day'):
        base = daily_to_monthly_energy('solar_panel_data_alaska', 'Fairbanks', name)
    else:    
        base = hdf5_to_dataframe('solar_panel_data_alaska','Fairbanks', name )
    # read data
    capacity = fairbanks[name].attrs.__getitem__("DC Capacity")
    
    
    # Change the date into a datetime format
    base['Date']= ""
    for i in range(len(base)):
        base['Date'][i] = str(base['Year'][i]) + '-' + str(base['Month'][i])    
        base['Date'] = pd.to_datetime(base['Date'])
    base.drop(['Year','Month'],axis = 1,inplace = True)
    
    # calculate annual value
    annual_values,month= annual_Norm(base)
    new_base = pd.DataFrame({'Date':month,'Annual':annual_values})

    location = pd.merge(location, new_base, on = ['Date'], how='outer',suffixes=(a, a+1))
    a = a+1
    
location = location.sort_values(by='Date')    

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [11]:
xaxis=location['Date']  
# setting x axis with Date

tmy2 = TMY2['ac_annual'][1]/18
tmy3 = TMY3['ac_annual'][1]/18
# store tmy2&3 data


location = location.drop(['Date'], axis=1)
# delet Date column to calculate median value and maxium value

location['Median'] = location.median(1)
location["Max"] = location.max(1)
# adding median and max value into dataframe

In [12]:
# plot median and max value vs. date
output_notebook()
p = figure(plot_width=600, plot_height=300, x_axis_type='datetime')

output_file("popup_fairbanks.html")

# title style
p.title.text='Normalized Annual Production in Fairbanks'
p.title.align = 'left'
p.title.text_font_size = "25px"

# add the number of panels in the figure
citation = Label(x=10, y=180, x_units='screen', y_units='screen',
                 text= str(len(fairbanks)) + ' panels here', render_mode='css')
                 #border_line_color='black', border_line_alpha=1.0,
                 #background_fill_color='white', background_fill_alpha=1.0)
p.add_layout(citation)

# draw lines    
p.line(xaxis, location['Max'], line_width=1, color='red', legend = 'Best')
p.line(xaxis, location['Median'], line_width=1, color='blue', legend = 'Median')
p.line(xaxis, tmy2*0.95, line_width=1, color='green', legend = 'TMY2_low')
p.line(xaxis, tmy2*1.05, line_width=1, color='green', legend = 'TMY2_high')
p.line(xaxis, tmy3*0.95, line_width=1, color='orange', legend = 'TMY3_low')
p.line(xaxis, tmy3*1.05, line_width=1, color='orange', legend = 'TMY3_high')

# add labels
p.xaxis.axis_label = 'Month'
p.yaxis.axis_label = 'Normalized Annual Production(kWh)'
p.legend.click_policy="hide"
show(p)