In [None]:
import pandas as pd
import numpy as np
import sys, os
import time
import io
import re
import datetime

In [None]:
### Changable parameters ### Remember to change tilt and packing density
samPath = '/Users/jacky/Documents/py3samsdk-master'
path_to_csv = 'csvs/'

year = 2013
description = 'tilted' # set a description of of this data set (e.g. flush, tilted)
add_to_existing = True # if True, will add on to existing hourly.csv and yearly.csv files
############################
# Remember to change tilt and packing density below!

In [None]:
if samPath not in sys.path:
    sys.path.insert(0, samPath)
from py3samsdk.sscapi import PySSC
from urllib.error import HTTPError
from urllib.request import urlopen

ssc_lib = '/Applications/sdk-release/osx64/'  # path to SAM SSC Library
ssc = PySSC(ssc_lib)

In [None]:
zipcodes = pd.read_csv("sunroof_data/project-sunroof-postal_code-11292017.csv").loc[:,['region_name', 'state_name',  'lat_avg', 'percent_covered', 'lng_avg', 'number_of_panels_f', 'number_of_panels_total', 'yearly_sunlight_kwh_f']]
zipcodes['capacity'] = zipcodes['number_of_panels_f']*0.00025
zipcodes['region_name'] = zipcodes['region_name'].apply(lambda x: str(x)[:-2])

# Set number to threshold that we want for percent_covered
zipcodes = zipcodes[zipcodes['percent_covered'] >= 80]
zipcodes = zipcodes[zipcodes['state_name'] != 'Alaska'] # NSRDB does not have weather data for most of Alaska
latlon = zipcodes.reset_index()
latlon.head()

In [None]:
# info should describe the data (e.g. tilted roof, flat roof)
def create_hourly_csv(df, year, info):
    if info != '':
        info = '_' + info
    if not os.path.isfile('csvs/agg_years/hourly{year}{info}.csv'.format(year = year, info = info)):
        df.to_csv('csvs/agg_years/hourly{year}{info}.csv'.format(year = year, info = info))
    else:
        df.to_csv('csvs/agg_years/hourly{year}{info}.csv'.format(year = year, info = info), mode='a', header=False)
#         # append a number at the end of the filename to prevent overwritting existing files with the same name
#         i = 1
#         while os.path.isfile('csvs/agg_years/hourly{year}{info}({index}).csv'.format(year = year, info = info, index=i)):
#             i += 1
#         df.to_csv('csvs/agg_years/hourly{year}{info}({index}).csv'.format(year = year, info = info, index=i))


def create_yearly_csv(df, year, info):
    if info != '':
        info = '_' + info
#     if not os.path.isfile('csvs/agg_years/yearly{year}{info}.csv'.format(year = year, info = info)):
    df.to_csv('csvs/agg_years/yearly{year}{info}.csv'.format(year = year, info = info))
#     else:
#         # append a number at the end of the filename to prevent overwritting existing files with the same name
#         i = 1
#         while os.path.isfile('csvs/agg_years/yearly{year}{info}({index}).csv'.format(year = year, info = info, index=i)):
#             i += 1
#         df.to_csv('csvs/agg_years/yearly{year}{info}({index}).csv'.format(year = year, info = info, index=i))

In [None]:
def instantiate_tables():
    interval = '60'
    if description != '':
        info = '_' + description
    path_to_yearly = 'csvs/agg_years/yearly{year}{info}.csv'.format(year = year, info = info)
    path_to_hourly = 'csvs/agg_years/hourly{year}{info}.csv'.format(year = year, info = info)

    hourly_df = pd.DataFrame(index = pd.date_range('1/1/{yr}'.format(yr=year), freq=interval+'Min', periods=525600/int(interval)))
    hourly_df.columns.name = None
    hourly_df_zipcodes = []
    if os.path.isfile(path_to_yearly) and os.path.isfile(path_to_hourly):
        print("Reading existing files for {year}{info}".format(year = year, info = info))
        yearly_generations = pd.read_csv(path_to_yearly).set_index('region_name')['generations']
        yearly_generations.index = yearly_generations.index.astype(str) # zipcode indices are interpreted as int, but we want str
        hourly_df_zipcodes = pd.read_csv(path_to_hourly, index_col=0).index.astype(str)
#         hourly_df = hourly_df.T
        
    else:
        print("New tables will be generated")
        yearly_generations = pd.Series(np.nan, index = latlon['region_name'])
        
    return yearly_generations, hourly_df, hourly_df_zipcodes

In [None]:
def run_sam():

    interval = '60'

    # Run weather data through SAM
    for index, row in latlon.iterrows():
        if not np.isnan(yearly_generations.loc[row['region_name']]) and row['region_name'] in existing_zipcodes_hourly:
            continue

        if os.path.isfile('{path}{year}/{region}_{state}.csv'.format(path=path_to_csv, year=year, region=row['region_name'], state=row['state_name'])):
            data = pd.read_csv('{path}{year}/{region}_{state}.csv'.format(path=path_to_csv, year=year, region=row['region_name'], state=row['state_name']))
        else:
            print("No weather data for " + row['region_name'])
            continue

        lat = row['lat_avg']
        lon = row['lng_avg']
        capacity = row['capacity']
        metadata = data.iloc[0:1, :]

        timezone = metadata['Time Zone'].values[0]
        elevation = metadata['Elevation'].values[0]

        # omit metadata at the top (hence the 2:)
        loc_data = data.iloc[:,:]
        loc_data.columns = loc_data.iloc[1] # reassign column names to the ones in the first row of the old table
        loc_data = loc_data.iloc[2:, :]
        loc_data = loc_data.set_index(pd.date_range('1/1/{yr}'.format(yr=year), freq=interval+'Min', periods=525600/int(interval)))
        loc_data = loc_data.dropna(axis = 1, how='all')
        loc_data[['DNI','DHI', 'Wind Speed', 'Temperature']] = loc_data[['DNI','DHI', 'Wind Speed', 'Temperature']].apply(pd.to_numeric)

        wfd = ssc.data_create()
        ssc.data_set_number(wfd, 'lat', lat)
        ssc.data_set_number(wfd, 'lon', lon)
        ssc.data_set_number(wfd, 'tz', float(timezone))
        ssc.data_set_number(wfd, 'elev', float(elevation))
        ssc.data_set_array(wfd, 'year', loc_data.index.year)
        ssc.data_set_array(wfd, 'month', loc_data.index.month)
        ssc.data_set_array(wfd, 'day', loc_data.index.day)
        ssc.data_set_array(wfd, 'hour', loc_data.index.hour)
        ssc.data_set_array(wfd, 'minute', loc_data.index.minute)
        ssc.data_set_array(wfd, 'dn', loc_data['DNI'])
        ssc.data_set_array(wfd, 'df', loc_data['DHI'])
        ssc.data_set_array(wfd, 'wspd', loc_data['Wind Speed'])
        ssc.data_set_array(wfd, 'tdry', loc_data['Temperature'])

        # Create SAM compliant object  
        dat = ssc.data_create()
        ssc.data_set_table(dat, 'solar_resource_data', wfd)
        ssc.data_free(wfd)

        # Specify the system Configuration
        # Set system capacity in MW
        system_capacity = capacity
        ssc.data_set_number(dat, 'system_capacity', system_capacity)
        # Set DC/AC ratio (or power ratio). See https://sam.nrel.gov/sites/default/files/content/virtual_conf_july_2013/07-sam-virtual-conference-2013-woodcock.pdf
        ssc.data_set_number(dat, 'dc_ac_ratio', 1.15)
        # Set tilt of system in degrees
        # For Google data, roof segments are considered Flat for roofs with a tilt of less than 10%
        ssc.data_set_number(dat, 'tilt', lat) ####tilt
        # Set azimuth angle (in degrees) from north (0 degrees)
        ssc.data_set_number(dat, 'azimuth', 180)
        # Set the inverter efficency
        ssc.data_set_number(dat, 'inv_eff', 96)
        # Set the system losses, in percent
        ssc.data_set_number(dat, 'losses', 14.0757)
        # Specify fixed tilt system (0=Fixed, 1=Fixed Roof, 2=1 Axis Tracker, 3=Backtracted, 4=2 Axis Tracker)
        ssc.data_set_number(dat, 'array_type', 0)
        # Set ground coverage ratio (PACKING DENSITY)
        ssc.data_set_number(dat, 'gcr', (np.cos(np.radians(lat)))**2) ####Packing Density
        # Set constant loss adjustment
        ssc.data_set_number(dat, 'adjust:constant', 0)

        # execute and put generation results back into dataframe
        mod = ssc.module_create('pvwattsv5')
        ssc.module_exec(mod, dat)
        loc_data['generation'] = np.array(ssc.data_get_array(dat, 'gen'))
        hourly_df.loc[:, row['region_name']] = loc_data['generation'] # column of generation data from SAM model
        # free the memory
        ssc.data_free(dat)
        ssc.module_free(mod)
        sys.stdout.write('\r{0}. {1}: {2}'.format(index, row['region_name'], loc_data['generation'].sum()))
        sys.stdout.flush()
        yearly_generations[row['region_name']] = loc_data['generation'].sum()*1000
        
    # latlon['generations'] = yearly_generations
    # latlon['error codes'] = errors

In [None]:
# Run this cell to start the program
try:
    yearly_generations, hourly_df, existing_zipcodes_hourly = instantiate_tables()
    run_sam()
    output_yearly = latlon.set_index('region_name')
    output_yearly['generations'] = yearly_generations
    create_hourly_csv(hourly_df.T, year, description)
    create_yearly_csv(output_yearly, year, description)
except (KeyboardInterrupt, Exception) as e:
    print()
    print(e)
    print("Closing. Saving current progress...")
    output_yearly = latlon.set_index('region_name')
    output_yearly['generations'] = yearly_generations
    
    create_hourly_csv(hourly_df.T, year, description)
    create_yearly_csv(output_yearly, year, description)
    print("Saved")
    pass
