In [1]:
import pandas as pd
import numpy as np
import sys, os
import time
import io
import re
import datetime

# change this path accordingly
samPath = '/Users/jacky/Documents/py3samsdk-master'
if samPath not in sys.path:
    sys.path.insert(0, samPath)
    

from py3samsdk.sscapi import PySSC
from urllib.error import HTTPError
from urllib.request import urlopen

ssc_lib = '/Applications/sdk-release/osx64/'  # path to SAM SSC Library
ssc = PySSC(ssc_lib)

In [2]:
# Reads input file for email and api key
inputfile = open(os.getcwd() + "/input.txt", "r")
lines = inputfile.readlines()

In [3]:
email_re = re.compile(r'[\w.]+@[\w]+.[\w]+')
emails = email_re.findall(lines[0])

api_keys = lines[1].replace(" ", "")[7:].split(',')

In [5]:
zipcodes = pd.read_csv("project-sunroof-postal_code-11292017.csv").loc[:,['region_name', 'state_name',  'lat_avg', 'percent_covered', 'lng_avg', 'number_of_panels_f', 'number_of_panels_total', 'yearly_sunlight_kwh_f']]
zipcodes['capacity'] = zipcodes['number_of_panels_f']*0.00025
zipcodes['region_name'] = zipcodes['region_name'].apply(lambda x: str(x)[:-2])

# Set number to threshold that we want for percent_covered
zipcodes = zipcodes[zipcodes['percent_covered'] >= 80]
zipcodes = zipcodes[zipcodes['state_name'] != 'Alaska']

# Uncomment if filtering by state
# zipcodes = zipcodes[zipcodes['state_name'] == 'California']


In [6]:
latlon = zipcodes.reset_index()
latlon.head()

Unnamed: 0,index,region_name,state_name,lat_avg,percent_covered,lng_avg,number_of_panels_f,number_of_panels_total,yearly_sunlight_kwh_f,capacity
0,1,15104,Pennsylvania,40.406255,98.791687,-79.862353,42634,130282.0,12028620.0,10.6585
1,3,15108,Pennsylvania,40.505561,96.88662,-80.187328,397143,863308.0,113262400.0,99.28575
2,4,15106,Pennsylvania,40.404535,99.68373,-80.094418,133591,350858.0,38206410.0,33.39775
3,5,15112,Pennsylvania,40.404671,99.732083,-79.83929,12877,44229.0,3605938.0,3.21925
4,6,15110,Pennsylvania,40.370982,99.574633,-79.852884,38592,73782.0,11037440.0,9.648


In [7]:
def wait():
    print("Waiting until 5am tomorrow")
    t = datetime.datetime.today()
    future = datetime.datetime(t.year, t.month, t.day, 5, 0)
    if t.hour >= 5:
        future += datetime.timedelta(days=1)
    time.sleep((future-t).seconds)
    
def wait_2_hours():
    print("Waiting 2 hours")
    time.sleep(7200)
    
def wait_4_hours():
    print("Waiting 4 hours")
    time.sleep(14400)

In [8]:
def get_remaining_calls(api_key):
    print("Getting remaining number of requests...")
    url = 'http://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv?wkt=POINT(-79.862353+40.406255)&names=2015&interval=60&email=jackyzhu@berkeley.edu&api_key={api_key}'.format(api_key=api_key)
    response = urlopen(url)
    headers = dict(response.getheaders())
    remaining_calls = int(headers['X-RateLimit-Remaining'])
    print(str(remaining_calls) + " remaining requests left")
    return remaining_calls

In [12]:
# Counties with same names are triggering the check if file already exists
# Despite getting the same file, the generation outputs are different


### Changable parameters ###
# include a trailing forward slash (/)
path_to_csv = os.getcwd() + '/csvs/'



year = 2013
interval = '60'
yearly_generations = np.zeros(len(latlon))
hourly_generations = pd.DataFrame()
errors = np.zeros(len(latlon))

if not os.path.exists(path_to_csv + str(year)):
    os.makedirs(path_to_csv + str(year))

email_count = 0 # which api account to use
error_counter = 0
trys_before_fail = 4
start = True


for index, row in latlon.iterrows():
    
    while start:
        # The only way to get the remaining number of calls is to make a request (which itself would decrement from our quote)
        # so we make a dummy call to get the remaining number of calls first
        try:
            remaining_calls = get_remaining_calls(api_keys[email_count])
            start = False
        except HTTPError as e:
            print('Error ' + str(e) + ' during initial call.')
            wait_4_hours()
    lat = row['lat_avg']
    lon = row['lng_avg']
    capacity = row['capacity']
    url = 'http://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv?wkt=POINT({lon}%20{lat})&names={year}&interval={interval}&email={email}&api_key={api}'.format(year=year, lat=lat, lon=lon, interval=interval, email=emails[email_count], api=api_keys[email_count])
    data = pd.DataFrame()
    trys = 1
    succeeded = False
    errored = False
    if os.path.isfile('{path}{year}/{region}_{state}.csv'.format(path=path_to_csv, year=year, region=row['region_name'], state=row['state_name'])):
        sys.stdout.write("\rFound %i files for year %i" % (index, year))
        sys.stdout.flush()
        continue
    else:
        while(not succeeded):
            try:
                time.sleep(2)
                remaining_calls -= 1
                response = urlopen(url)
                data = pd.read_csv(io.StringIO(response.read().decode('utf-8')))
                data.to_csv(path_to_csv + str(year) + '/' + str(row['region_name']) + '_' + str(row['state_name']) + '.csv')
                trys += 1
                
                succeeded = True
                error_count = 0
                print("Acquired weather data for " + str(row['region_name']), "Remaining calls: " + str(remaining_calls))
            except HTTPError as error:
                succeeded = False
                if trys > trys_before_fail:
                    continue
                if error.code == 504 or error.code == 500:
                    print('Attempting retry number {0} with zip code {1}'.format(trys, row['region_name']))
                    trys += 1
                else:
                    print('{0}. Error {1} with {2},{3}'.format(index, error.code, row['region_name'], row['state_name']))
                    yearly_generations[index] = None
                    errored = True
                    errors[index] = error.code
                    error_counter += 1
                    break
            except:
                latlon['generations'] = yearly_generations
                hourly_generations.to_csv('hourly' + str(year) + '.csv')
                latlon.to_csv('year' + str(year) + '.csv')
                raise
    
    
#     if int(remaining_calls) < 10:
#         free_account = look_for_account()
#         if free_account is not None:
#             email_count = free_account
#         else:
#             break
    
    if 0 <= remaining_calls < trys_before_fail+5: 
        wait()
        start = True
print("Done getting all weather data for year " + str(year))

Getting remaining number of requests...
1233 remaining requests left
Found 8007 files for year 2013

In [None]:
path_to_csv = '/Volumes/RAEL_4TB_SILVER/nrel_csvs/'
# Run weather data through SAM
for index, row in latlon.iterrows():
    if os.path.isfile(path_to_csv + str(year) + '/' + str(row['region_name']) + '_' + str(row['state_name']) + '.csv'):
        data = pd.read_csv(path_to_csv + str(year) + '/' + str(row['region_name']) + '_' + str(row['state_name']) + '.csv')
    else:
        print("No weather data for " + row['region_name'])
        continue
    metadata = data.iloc[0:1, :]

    timezone = metadata['Time Zone'].values[0]
    elevation = metadata['Elevation'].values[0]

    loc_data = data.iloc[:,:]
    loc_data.columns = loc_data.iloc[1]
    loc_data = loc_data.iloc[2:, :]
    loc_data = loc_data.set_index(pd.date_range('1/1/{yr}'.format(yr=year), freq=interval+'Min', periods=525600/int(interval)))
    loc_data = loc_data.dropna(axis = 1, how='all')
    loc_data[['DNI','DHI', 'Wind Speed', 'Temperature']] = loc_data[['DNI','DHI', 'Wind Speed', 'Temperature']].apply(pd.to_numeric)

    wfd = ssc.data_create()
    ssc.data_set_number(wfd, 'lat', lat)
    ssc.data_set_number(wfd, 'lon', lon)
    ssc.data_set_number(wfd, 'tz', float(timezone))
    ssc.data_set_number(wfd, 'elev', float(elevation))
    ssc.data_set_array(wfd, 'year', loc_data.index.year)
    ssc.data_set_array(wfd, 'month', loc_data.index.month)
    ssc.data_set_array(wfd, 'day', loc_data.index.day)
    ssc.data_set_array(wfd, 'hour', loc_data.index.hour)
    ssc.data_set_array(wfd, 'minute', loc_data.index.minute)
    ssc.data_set_array(wfd, 'dn', loc_data['DNI'])
    ssc.data_set_array(wfd, 'df', loc_data['DHI'])
    ssc.data_set_array(wfd, 'wspd', loc_data['Wind Speed'])
    ssc.data_set_array(wfd, 'tdry', loc_data['Temperature'])

    # Create SAM compliant object  
    dat = ssc.data_create()
    ssc.data_set_table(dat, 'solar_resource_data', wfd)
    ssc.data_free(wfd)

    # Specify the system Configuration
    # Set system capacity in MW
    system_capacity = capacity
    ssc.data_set_number(dat, 'system_capacity', system_capacity)
    # Set DC/AC ratio (or power ratio). See https://sam.nrel.gov/sites/default/files/content/virtual_conf_july_2013/07-sam-virtual-conference-2013-woodcock.pdf
    ssc.data_set_number(dat, 'dc_ac_ratio', 1.15)
    # Set tilt of system in degrees
    # For Google data, roof segments are considered Flat for roofs with a tilt of less than 10%
    ssc.data_set_number(dat, 'tilt', lat)
    # Set azimuth angle (in degrees) from north (0 degrees)
    ssc.data_set_number(dat, 'azimuth', 180)
    # Set the inverter efficency
    ssc.data_set_number(dat, 'inv_eff', 96)
    # Set the system losses, in percent
    ssc.data_set_number(dat, 'losses', 14.0757)
    # Specify fixed tilt system (0=Fixed, 1=Fixed Roof, 2=1 Axis Tracker, 3=Backtracted, 4=2 Axis Tracker)
    ssc.data_set_number(dat, 'array_type', 0)
    # Set ground coverage ratio (PACKING DENSITY)
    ssc.data_set_number(dat, 'gcr', (np.cos(np.radians(lat)))**2)
    # Set constant loss adjustment
    ssc.data_set_number(dat, 'adjust:constant', 0)

    # execute and put generation results back into dataframe
    mod = ssc.module_create('pvwattsv5')
    ssc.module_exec(mod, dat)
    loc_data['generation'] = np.array(ssc.data_get_array(dat, 'gen'))
    hourly_generations[row['region_name']] = loc_data['generation']

    # free the memory
    ssc.data_free(dat)
    ssc.module_free(mod)
    print('{0}. {1}: {2}; Remaining Calls: {3}'.format(index, row['region_name'], loc_data['generation'].sum(), remaining_calls))
    yearly_generations[index] = loc_data['generation'].sum()*1000
    time.sleep(1)


latlon['generations'] = yearly_generations
latlon['error codes'] = errors

In [14]:
hourly_generations.to_csv('hourly' + str(year) + '.csv')
latlon.to_csv('year' + str(year) + '.csv')

In [44]:
hourly = pd.read_csv("year2012.csv")
# hourly1 = pd.read_csv("hourly2012(old).csv")
# hourly2= pd.read_csv("hourly2012(old2).csv")
# hourly_result = pd.concat([hourly, hourly1, hourly2], axis=1)

In [45]:
hourly.shape

(6562, 13)

In [32]:
print(hourly.shape)
print(hourly2.shape)
print(hourly1.shape)

(6562, 12)
(6562, 12)
(6562, 12)


In [25]:
hourly_result.to_csv('hourly' + str(year) + '.csv')

In [None]:
latlon = pd.read_csv('yearly.csv')

In [None]:
latlon['percent ratio'] = latlon['generations'] / latlon['yearly_sunlight_kwh_f']

In [None]:
%matplotlib inline
latlon['percent ratio'].plot(kind='hist')

In [None]:
from matplotlib import pyplot as plt
plt.style.use('ggplot')

def nsrdb_plot(df, i):
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax2 = ax.twinx()
    df['90 Degree Zenith'] = 90
    df[['GHI', 'DNI', 'DHI', 'Solar Zenith Angle', '90 Degree Zenith']][i:i+int(interval)].plot(ax=ax, figsize=(15,8), yticks=(np.arange(0,900,100)), style={'90 Degree Zenith': '--','Solar Zenith Angle': '-o', 'DNI': '-o', 'DHI': '-o', 'GHI': '-o'}, legend=False)
    df['generation'][i:i+30].plot(ax=ax2, yticks=(np.arange(0,4.5,0.5)), style={'generation': 'y-o'})
    ax.grid()
    ax.set_ylabel('W/m2')
    ax2.set_ylabel('kW')
    ax.legend(loc=2, ncol=5, frameon=False)
    ax2.legend(loc=1, frameon=False)

In [None]:
nsrdb_plot(df, 5050)