# Download US power output PV Watts (version 8) 

## Documentation: https://developer.nrel.gov/docs/api-key/
## https://pvwatts.nrel.gov/downloads/pvwattsv5.pdf

In [1]:
import numpy as np
import scipy as sp
import requests
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import matplotlib.pyplot as plt

In [2]:
hrh_apikey = 'GNoTfD5IZWwIEz24zB5Wn0aEhDvNJSep5bwapzTI'
data_path = '/Users/hrh/Desktop/AI/erdos/rapower_data/'

In [3]:
# 1. Define PVWatts parameters and your API key
parameters = {
    'format': 'json',
    'system_capacity': 1000,
    'module_type': 0,
    'losses': 14.08,
    'array_type': 1,
    'tilt': 40,
    'azimuth': 180,
    'lat': 40.0099983215332,
    'lon':  -105.2600021362305,
    'dataset': 'tmy3',
    'radius': 25,
    'timeframe': 'monthly',
    'api_key': hrh_apikey  # Replace with your API key
}



In [4]:
url = 'https://developer.nrel.gov/api/pvwatts/v8'

# 2. Send request
response = requests.get(url, params=parameters)
data = response.json()

# 3. Convert the output data to a DataFrame
df = pd.DataFrame(data['outputs'])

print(response)

<Response [200]>


In [5]:
##### Try to get data for California on a grid with lat_delta and lon_delta resolution
# lat_delta = 0.25
# lon_delta = 0.25

# # #CONUS box
# # top = 49.3457868 # north lat
# # left = -124.7844079 # west long
# # right = -66.9513812 # east long
# # bottom =  24.7433195 # south lat

# #California box
# top    = 42.009518
# bottom = 32.534156
# left   = -124.409591 
# right  =  -114.131211


## Define list of lat lon coordinates (either for CONUS or California)

<font color ='red'> PVWatts API access currently has a limit of 1000 queries per hour. So, data has to be downloaded one chunk (size<1000) at a time </font>

In [6]:
# 1. Define a list of latitudes and longitudes for CONUS
# lats = list(np.arange(bottom, top, lat_delta))  # from southernmost to northernmost points with a 5-degree step
# lons = list(np.arange(left, right, lon_delta))  # from westernmost to easternmost points with a 5-degree step

all_data = []
##### Read lats and lons CA_grid_dataframe #############
ca_gridpath = '/Users/hrh/Desktop/AI/erdos/SolarFarmPrediction/data_including_land_cover.csv'

df_main             = pd.read_csv(ca_gridpath)
ca_grid             = df_main.loc[df_main['State']=='CA']
#ca_grid_usable      = ca_grid.loc[ca_grid['land_cover']==1.0]
ca_coords           = list(zip(ca_grid['Latitude'], ca_grid['Longitude']))
#Check if ca_coords are within california grid box
ca_lats = [item[0] for item in ca_coords] 
ca_lons = [item[1] for item in ca_coords]
(min(ca_lats),max(ca_lats)),(min(ca_lons),max(ca_lons))
ca_coords1 = ca_coords[0:1000]
# ca_coords2 = ca_coords[1000:2000]
# ca_coords3 = ca_coords[2000:3000]
# ca_coords4 = ca_coords[3000:4000]
# ca_coords5 = ca_coords[4000:]
#ca_coords1

In [7]:
%%time
    for (lat,lon) in ca_coords1:
        parameters = {
            'format': 'json',
            'system_capacity': 1000, #1000kW = 1 MW, 1 MW or greater is considered utility-scale
            'module_type': 0,       # 0- Standard module, 1- Permium, 2-Thin film
            'losses': 14,           # Losses in percentage
            'array_type': 0,        # Open Rack: Also known as ground mount.
            'tilt': 40,
            'azimuth': 180,         # This means that the solar array is facing South in the Northeren Hemisphere
            'lat': lat,
            'lon': lon,
            'dataset': 'tmy3',      #tmy2 is 1960-1990, tmy3 is 1990-2005
            'radius': 10,            # 0-Pick the station nearest to the given (lat,lon),e.g: 50 -50 miles
            'timeframe': 'monthly',
            'api_key': hrh_apikey
        }

        url = 'https://developer.nrel.gov/api/pvwatts/v8'
        response = requests.get(url, params=parameters)
        data = response.json()
             
        # Extract and append relevant data to the all_data list
        # Extract relevant data
        print('Processing (lat,lon):',lat,lon,'\n')
        
        # Check if 'outputs' is in the data
        if 'outputs' not in data:
            print(f"No data available for latitude {lat} and longitude {lon}")
            continue

        output_data = data['outputs']
        input_data  = data['inputs']

        output_data['latitude']  = input_data['lat']
        output_data['longitude'] = input_data['lon']
        all_data.append(output_data)
        
        
# Convert all_data to a DataFrame
df = pd.DataFrame(all_data)

print(df)

Processing (lat,lon): 37.4051763 -122.0847692 

Processing (lat,lon): 37.8471729 -122.2715295 

Processing (lat,lon): 37.1544497 -121.5602301 

Processing (lat,lon): 38.0069813 -122.086832 

Processing (lat,lon): 38.0898882 -122.1426351 

Processing (lat,lon): 37.9737295 -122.3638504 

Processing (lat,lon): 38.0862958 -122.1396794 

Processing (lat,lon): 38.4300017 -122.7255246 

Processing (lat,lon): 38.4591205 -122.7449891 

Processing (lat,lon): 38.436029 -122.7223877 

Processing (lat,lon): 38.4888689 -122.7776581 

Processing (lat,lon): 38.453198 -122.731544 

Processing (lat,lon): 38.4239634 -122.7807543 

Processing (lat,lon): 38.4271074 -122.7856681 

Processing (lat,lon): 37.9414521 -122.007725 

Processing (lat,lon): 39.3703737 -123.3024709 

Processing (lat,lon): 33.8373954999999 -117.3716772 

Processing (lat,lon): 33.9944764999999 -117.4297158 

Processing (lat,lon): 37.7564202566745 -122.198718031487 

Processing (lat,lon): 37.7027656058429 -121.950435688049 

Processing 

In [8]:
df

Unnamed: 0,ac_monthly,poa_monthly,solrad_monthly,dc_monthly,ac_annual,solrad_annual,capacity_factor,latitude,longitude
0,"[97883.10073632629, 88094.07327237079, 137821....","[119.7725774925076, 109.3571341576667, 176.159...","[3.863631532016375, 3.905611934202383, 5.68257...","[102649.4987568946, 92591.49238273455, 144298....",1.557778e+06,5.415864,17.782850,37.4051763,-122.0847692
1,"[86443.0444897815, 79190.72434666935, 94725.41...","[104.4858563816585, 97.81385154426584, 118.363...","[3.370511496182532, 3.493351840866637, 3.81817...","[90764.74246637561, 83357.66411789537, 99753.7...",1.460293e+06,5.016483,16.670009,37.8471729,-122.2715295
2,,,,,,,,37.1544497,-121.5602301
3,"[64333.53765837142, 79710.70713752598, 96341.6...","[79.37921239410372, 98.29785315657168, 119.565...","[2.560619754648507, 3.510637612734703, 3.85694...","[67945.47351754847, 83895.0453953961, 101405.8...",1.497993e+06,5.235499,17.100375,38.0069813,-122.086832
4,"[64333.53765837142, 79710.70713752598, 96341.6...","[79.37921239410372, 98.29785315657168, 119.565...","[2.560619754648507, 3.510637612734703, 3.85694...","[67945.47351754847, 83895.0453953961, 101405.8...",1.497993e+06,5.235499,17.100375,38.0898882,-122.1426351
...,...,...,...,...,...,...,...,...,...
735,,,,,,,,37.8338010278755,-121.947563529848
736,,,,,,,,35.0322820024508,-117.347833996416
737,,,,,,,,33.3995500999958,-117.136595049987
738,,,,,,,,34.7835444975449,-118.419560622252


In [9]:
df.to_csv(data_path+'pvwatts_monthly_ca_1990_2005_v1.csv',index=False)

In [12]:
df1 = pd.read_csv(data_path+'pvwatts_monthly_ca_1990_2005_v1.csv')
df1

Unnamed: 0,ac_monthly,poa_monthly,solrad_monthly,dc_monthly,ac_annual,solrad_annual,capacity_factor,latitude,longitude
0,"[69844.1023460872, 75045.72158984367, 91812.52...","[86.22523074264006, 94.44525898899586, 116.197...","[2.781459056214195, 3.373044963892709, 3.74832...","[73555.32231217949, 79050.17240451233, 96686.1...",1.416126e+06,4.993801,16.165822,38.827584,-122.730353
1,"[69844.1023460872, 75045.72158984367, 91812.52...","[86.22523074264006, 94.44525898899586, 116.197...","[2.781459056214195, 3.373044963892709, 3.74832...","[73555.32231217949, 79050.17240451233, 96686.1...",1.416126e+06,4.993801,16.165822,38.926144,-122.730353
2,"[78632.32131516683, 77160.25872044977, 131830....","[97.76647973425857, 98.29397776771324, 167.979...","[3.153757410782534, 3.510499205989759, 5.41869...","[82625.42311636091, 81231.0293172343, 138085.1...",1.491077e+06,5.317607,17.021429,39.024705,-122.730353
3,"[78632.32131516683, 77160.25872044977, 131830....","[97.76647973425857, 98.29397776771324, 167.979...","[3.153757410782534, 3.510499205989759, 5.41869...","[82625.42311636091, 81231.0293172343, 138085.1...",1.491077e+06,5.317607,17.021429,39.123266,-122.730353
4,"[78632.32131516683, 77160.25872044977, 131830....","[97.76647973425857, 98.29397776771324, 167.979...","[3.153757410782534, 3.510499205989759, 5.41869...","[82625.42311636091, 81231.0293172343, 138085.1...",1.491077e+06,5.317607,17.021429,39.221826,-122.730353
...,...,...,...,...,...,...,...,...,...
895,"[90245.3440624514, 122418.1658310659, 135549.0...","[107.9822680992244, 149.2098129192754, 167.020...","[3.483298970942721, 5.32892188997412, 5.387766...","[94735.6601047351, 128414.2415947522, 142415.8...",1.595089e+06,5.595870,18.208776,41.390160,-120.338196
896,"[72317.45409090247, 112822.3990137254, 105517....","[85.74944261329925, 135.3117632746649, 128.825...","[2.766111052041911, 4.832562974095174, 4.15567...","[76219.45346084131, 118372.6031486477, 111125....",1.530115e+06,5.319285,17.467064,41.882964,-120.338196
897,"[107930.2542489552, 109466.3467705347, 130374....","[135.5816026199147, 136.052948704705, 165.0807...","[4.373600084513377, 4.859033882310891, 5.32518...","[113081.9596676523, 114783.2667843446, 136680....",1.595308e+06,5.550027,18.211283,33.998113,-120.229462
898,"[119385.6862595109, 116836.4728466618, 130637....","[147.0023587029102, 145.4736696925769, 161.720...","[4.74201157106162, 5.195488203306319, 5.216801...","[124961.8371708737, 122384.6190594951, 136904....",1.516730e+06,5.207389,17.314267,34.490916,-120.229462


In [None]:
# Convert DataFrame to GeoDataFrame
geometry = [Point(xy) for xy in zip(df1['longitude'], df1['latitude'])]
geo_df   = gpd.GeoDataFrame(df1, geometry=geometry)

In [None]:
# Read the California county boundaries shapefile# Read the California county boundaries shapefile
ca_counties = gpd.read_file(data_path+ 'CA_counties/CA_Counties_TIGER2016.shp')

geo_df.crs  = "EPSG:4326" #Setting the crs of geo_df
#ca_counties = ca_counties.to_crs("EPSG:4326")
print('The coordinate reference system for this file:',ca_counties.crs,geo_df.crs)


# # Ensure that your data is in the same CRS as the shapefile
geo_df = geo_df.to_crs(ca_counties.crs)

# Plot the county boundaries
fig, ax = plt.subplots(figsize=(15, 15))
ca_counties.plot(ax=ax, color='white', edgecolor='black')

# Plot your data on top of the county boundaries
geo_df.plot(ax=ax, column='ac_annual', legend=True, markersize=50, cmap='OrRd', alpha=0.5)

# Add titles and labels as needed
plt.title("Annual AC Output in California by County")
plt.xlabel('Longitude')
plt.ylabel('Latitude')

# Optionally, adjust the plot limits to the extent of the shapefile or your data points
ax.set_xlim(ca_counties.bounds.minx.min(), ca_counties.bounds.maxx.max())
ax.set_ylim(ca_counties.bounds.miny.min(), ca_counties.bounds.maxy.max())

# Display the plot
plt.show()

In [None]:
# Load US boundaries and plot
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
#us = world[world.name == "United States of America"]
ca  = world[world.name=='California']

# Plot
fig, ax = plt.subplots(figsize=(15,10))
ca.plot(ax=ax, color='lightgray')
geo_df.plot(ax=ax, column='ac_annual', legend=True, markersize=50, cmap='OrRd')  # Use the column parameter
plt.title("Annual AC Output on California")
plt.show()

In [None]:
# {
  # "inputs": {
  #   "api_key": "DEMO_KEY",
  #   "azimuth": "180",
  #   "system_capacity": "4",
  #   "losses": "14",
  #   "array_type": "1",
  #   "module_type": "0",
  #   "gcr": "0.4",
  #   "dc_ac_ratio": "1.2",
  #   "inv_eff": "96.0",
  #   "radius": "0",
  #   "dataset": "nsrdb",
  #   "tilt": "10",
  #   "address": "boulder, co",
  #   "soiling": [
  #     12.0,
  #     4.0,
  #     45.0,
  #     23.0,
  #     9.0,
  #     99.0,
  #     67.0,
  #     12.54,
  #     54.0,
  #     9.0,
  #     0.0,
  #     7.6
  #   ],
  #   "albedo": "0.3",
  #   "bifaciality": "0.7"
  # },
  # "errors": [],
  # "warnings": [],
  # "version": "8.0.0",
  # "ssc_info": {
  #   "version": 275,
  #   "build": "Linux 64 bit GNU/C++ Oct  4 2022 03:10:07",
  #   "module": "pvwattsv8"
  # },
  # "station_info": {
  #   "lat": 40.0099983215332,
  #   "lon": -105.2600021362305,
  #   "elev": 1635.640014648438,
  #   "tz": -7.0,
  #   "location": "149190",
  #   "city": "",
  #   "state": "Colorado",
  #   "solar_resource_file": "149190.csv",
  #   "weather_data_source": "NSRDB PSM V3 GOES tmy-2020 3.2.0"
  # }