In [55]:
import h5py
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from bokeh.plotting import figure, show, output_file, output_notebook
from bokeh.models import ColumnDataSource, Range1d, LabelSet, Label
import requests
from bokeh.palettes import Spectral4

In [2]:
my_file = h5py.File("alaska_solar_data_storage.hdf5", 'r')

In [3]:
fairbanks = my_file.get('Fairbanks')

In [72]:
str(len(fairbanks))

'4'

In [None]:
file = h5py.File("solar_panel_data_alaska", 'r')
file.keys()

In [4]:
# Fairbanks
# Get the data from the PV Watts --TMY2
list_parameters = {"formt": 'JSON', "api_key": "spJFj2l5ghY5jwk7dNfVYs3JHbpR6BOGHQNO8Y9Z", "system_capacity": 18, "module_type": 0, "losses": 14.08,
              "array_type": 0, "tilt": 50, "azimuth": 180, "lat": 64.84, "lon": -147.76, "dataset": 'tmy2'}
json_response = requests.get("https://developer.nrel.gov/api/pvwatts/v6", params = list_parameters).json()
TMY2 = pd.DataFrame(data = json_response['outputs'])
# Get the data from the PV Watts --TMY3
list_parameters = {"formt": 'JSON', "api_key": "spJFj2l5ghY5jwk7dNfVYs3JHbpR6BOGHQNO8Y9Z", "system_capacity": 18, "module_type": 0, "losses": 14.08,
              "array_type": 0, "tilt": 50, "azimuth": 180, "lat": 64.84, "lon": -147.76, "dataset": 'tmy3'}
json_response = requests.get("https://developer.nrel.gov/api/pvwatts/v6", params = list_parameters).json()
TMY3 = pd.DataFrame(data = json_response['outputs'])

In [5]:
def hdf5_to_dataframe(hdf5_filename, location_name, panel_name):
    """This function extracts data from an HDF5 file and loads it into a pandas dataframe"""
    #Load the HDF5 file data
    hdf5_file = h5py.File('{}.hdf5'.format(hdf5_filename), 'r')
    hdf5_location = hdf5_file.get(location_name)
    panel_location = hdf5_location.get(panel_name)
    dataframe = pd.DataFrame()
    for keys in panel_location.keys():
        dataframe[str(keys)] = panel_location[str(keys)]
    return dataframe

# Calculate monthly production

In [None]:
location=pd.DataFrame(columns=['Month', 'Year'])
a=0
for name in fairbanks:
    capacity = fairbanks[name].attrs.__getitem__("DC Capacity")
    base = hdf5_to_dataframe('alaska_solar_data_storage','Fairbanks', name )
    base['Energy'] = base['Energy']/capacity
    location = pd.merge(location, base, on = ['Month', 'Year'], how='outer',suffixes=(a, a+1))
    a = a+1
    
# set up a dataframe to store TMY2&3 ac_monthly
pv = pd.DataFrame()
pv['TMY2'] = TMY2.ac_monthly
pv['TMY3'] = TMY3.ac_monthly
pv['Month'] = [1,2,3,4,5,6,7,8,9,10,11,12]

result = pd.merge(location, pv, on = ['Month'], how='outer' )
#merge PVWatts data into location data


result['Date'] = ""
# Change the date into a datetime format
for i in range(len(result)):
    result['Date'][i] = str(result['Year'][i]) + '-' + str(result['Month'][i])    
    result['Date'] = pd.to_datetime(result['Date'])


result = result.sort_values(by='Date')
# sort by date

result

In [None]:
xaxis=result['Date']  
# setting x axis with Date

tmy2 = result['TMY2']/18
tmy3 = result['TMY3']/18
# store tmy2&3 data


result = result.drop(['Date', 'Month', 'Year', 'TMY2', 'TMY3'], axis=1)
# delet Date column to calculate median value and maxium value

result['Median'] = result.median(1)
result["Max"] = result.max(1)
# adding median and max value into dataframe
result

In [None]:
# plot median and max value vs. date
output_notebook()
p = figure(plot_width=600, plot_height=300, x_axis_type='datetime')

# title style
p.title.text='The Solar Production in Fairbanks'
p.title.align = 'left'
p.title.text_font_size = "25px"

# add the number of panels in the figure
citation = Label(x=10, y=180, x_units='screen', y_units='screen',
                 text= str(len(fairbanks)) + ' panels here', render_mode='css')
                 #border_line_color='black', border_line_alpha=1.0,
                 #background_fill_color='white', background_fill_alpha=1.0)
p.add_layout(citation)

# draw lines
p.line(xaxis, result['Max'], line_width=1, color='red', legend = 'Best')
p.line(xaxis, result['Median'], line_width=1, color='blue', legend = 'Median')
p.line(xaxis, tmy2, line_width=1, color='green', legend = 'TMY2')
p.line(xaxis, tmy3, line_width=1, color='orange', legend = 'TMY3')

# add labels
p.xaxis.axis_label = 'Month'
p.yaxis.axis_label = 'Monthly Production(kWh)'
p.legend.click_policy="hide"
show(p)

 # Calculate normalized annual production

In [6]:
def annual_Norm(dataframe):
    lenth_list = list(range(12,len(dataframe.index)))
    annual_values = []
    month = []
    for i in range(len(lenth_list)):
        single_values = dataframe['Energy'][lenth_list[i]-12:lenth_list[i]].sum()/capacity
        #rolling_average.append(each_period)
        single_month = dataframe['Date'][lenth_list[i]]
        annual_values.append(single_values)
        month.append(single_month)
    return(annual_values,month)

In [42]:
location=pd.DataFrame(columns=['Date'])
a=0
for name in fairbanks:
    
    base = hdf5_to_dataframe('alaska_solar_data_storage','Fairbanks', name )
    # read data
    capacity = fairbanks[name].attrs.__getitem__("DC Capacity")
    
    
    # Change the date into a datetime format
    base['Date']= ""
    for i in range(len(base)):
        base['Date'][i] = str(base['Year'][i]) + '-' + str(base['Month'][i])    
        base['Date'] = pd.to_datetime(base['Date'])
    base.drop(['Year','Month'],axis = 1,inplace = True)
    
    # calculate annual value
    annual_values,month= annual_Norm(base)
    new_base = pd.DataFrame({'Date':month,'Annual':annual_values})

    location = pd.merge(location, new_base, on = ['Date'], how='outer',suffixes=(a, a+1))
    a = a+1
    
location

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]


Unnamed: 0,Date,Annual1,Annual2,Annual3,Annual4
0,2011-10-01,679.035088,,,
1,2011-11-01,700.487719,,,
2,2011-12-01,701.610526,,,
3,2012-01-01,701.445614,,,
4,2012-02-01,696.663158,,,
5,2012-03-01,694.356140,,,
6,2012-04-01,662.954386,,,
7,2012-05-01,667.708772,,1109.704643,
8,2012-06-01,652.491228,,1236.757143,
9,2012-07-01,658.275439,,1254.976429,


In [43]:
xaxis=location['Date']  
# setting x axis with Date

tmy2 = TMY2['ac_annual'][1]/18
tmy3 = TMY3['ac_annual'][1]/18
# store tmy2&3 data


location = location.drop(['Date'], axis=1)
# delet Date column to calculate median value and maxium value

location['Median'] = location.median(1)
location["Max"] = location.max(1)
# adding median and max value into dataframe
location

Unnamed: 0,Annual1,Annual2,Annual3,Annual4,Median,Max
0,679.035088,,,,679.035088,679.035088
1,700.487719,,,,700.487719,700.487719
2,701.610526,,,,701.610526,701.610526
3,701.445614,,,,701.445614,701.445614
4,696.663158,,,,696.663158,696.663158
5,694.356140,,,,694.356140,694.356140
6,662.954386,,,,662.954386,662.954386
7,667.708772,,1109.704643,,888.706707,1109.704643
8,652.491228,,1236.757143,,944.624185,1236.757143
9,658.275439,,1254.976429,,956.625934,1254.976429


In [75]:
# plot median and max value vs. date
output_notebook()
p = figure(plot_width=600, plot_height=300, x_axis_type='datetime')

# title style
p.title.text='The Solar Production in Fairbanks'
p.title.align = 'left'
p.title.text_font_size = "25px"

# add the number of panels in the figure
citation = Label(x=10, y=180, x_units='screen', y_units='screen',
                 text= str(len(fairbanks)) + ' panels here', render_mode='css')
                 #border_line_color='black', border_line_alpha=1.0,
                 #background_fill_color='white', background_fill_alpha=1.0)
p.add_layout(citation)

# draw lines    
p.line(xaxis, location['Max'], line_width=1, color='red', legend = 'Best')
p.line(xaxis, location['Median'], line_width=1, color='blue', legend = 'Median')
p.line(xaxis, tmy2*0.95, line_width=1, color='green', legend = 'TMY2_low')
p.line(xaxis, tmy2*1.05, line_width=1, color='green', legend = 'TMY2_high')
p.line(xaxis, tmy3*0.95, line_width=1, color='orange', legend = 'TMY3_low')
p.line(xaxis, tmy3*1.05, line_width=1, color='orange', legend = 'TMY3_high')

# add labels
p.xaxis.axis_label = 'Month'
p.yaxis.axis_label = 'Normalized Annual Production(kWh)'
p.legend.click_policy="hide"
show(p)