In [1]:
import pandas as pd
import requests
import datetime
import os
from tqdm import tqdm

In [2]:
# Input file directory for coordinates
input_file = 'data/uscities.csv' 
# Output folder directory for raw data
output = "Data/raw" 

In [3]:
# Create the output folder if it doesn't exist
if not os.path.exists(output):
    os.makedirs(output)

In [4]:
# Get every city or towns name, state, and coordinates
city_locations = pd.read_csv(input_file)
city_locations.head()

Unnamed: 0,city,city_ascii,state_id,state_name,county_fips,county_name,lat,lng,population,density,source,military,incorporated,timezone,ranking,zips,id
0,New York,New York,NY,New York,36081,Queens,40.6943,-73.9249,18908608,11080.3,shape,False,True,America/New_York,1,11229 11228 11226 11225 11224 11222 11221 1122...,1840034016
1,Los Angeles,Los Angeles,CA,California,6037,Los Angeles,34.1141,-118.4068,11922389,3184.7,shape,False,True,America/Los_Angeles,1,91367 90291 90293 90292 91316 91311 90035 9003...,1840020491
2,Chicago,Chicago,IL,Illinois,17031,Cook,41.8375,-87.6866,8497759,4614.5,shape,False,True,America/Chicago,1,60018 60649 60641 60640 60643 60642 60645 6064...,1840000494
3,Miami,Miami,FL,Florida,12086,Miami-Dade,25.784,-80.2101,6080145,4758.9,shape,False,True,America/New_York,1,33128 33129 33125 33126 33127 33149 33144 3314...,1840015149
4,Houston,Houston,TX,Texas,48201,Harris,29.786,-95.3885,5970127,1384.0,shape,False,True,America/Chicago,1,77069 77068 77061 77060 77063 77062 77065 7706...,1840020925


In [5]:
# Parameter(s) to query
parameter = 'ALLSKY_SFC_SW_DWN'
# From YYYYMMDD
start_date = '20200101'
# to YYYYMMDD
end_date = '20240101'  # datetime.datetime.now().strftime('%Y%m%d')
# CSV download url
base_url = r"https://power.larc.nasa.gov/api/temporal/daily/point?parameters={parameter}&community=RE&longitude={longitude}&latitude={latitude}&start={start_date}&end={end_date}&format=CSV"

In [6]:
def calculate_pv_energy_generation(daily_irradiance):
    area = 2  # m^2
    efficiency = 0.18  # % of energy converted
    E =  daily_irradiance * area * efficiency # kWh/day
    return E

In [7]:
# Empty list of lists to turn into df
solar_data_per_city = []

# Solar panel specs
panel_efficiency = 0.18  # Decimal percentage of energy conversion
panel_area = 2  # m^2

# For each city or town in the United States, request and save irradiance data then process for output dataframe
for city, state, county, latitude, longitude in tqdm(zip(city_locations['city'], city_locations['state_id'], city_locations['county_name'], city_locations['lat'], city_locations['lng'])):
    # Data gathering url
    url = base_url.format(longitude=longitude, latitude=latitude, parameter=parameter, start_date=start_date, end_date=end_date)
    filename = f"{city},{state} : {start_date}-{end_date}.csv"
    filepath = os.path.join(output, filename)
    
    # If the cities data has not been downloaded already, pull it and save to Data/raw
    if filename not in os.listdir(output):
        response = requests.get(url=url, verify=True, timeout=30.00)
        # If request was successful, save 
        if response.status_code == 200:
            # Saves the file in the output folder
            with open(filepath, 'wb') as file_object: 
                file_object.write(response.content)
    
    # Open file and read into csv.  First 9 rows are header information
    city_data = pd.read_csv(filepath, header=9)
    # Condense irradiance data from past 4 years into a daily average
    avg_daily_irradiance = float(round(city_data['ALLSKY_SFC_SW_DWN'].sum() / len(city_data), 4))  # kWh/m^2/day
    # Calculate average energy produced per day
    est_daily_energy = float(round(avg_daily_irradiance * panel_efficiency * panel_area, 4))  # kWh/day
    # Append list for df conversion
    solar_data_per_city.append([city, state, county, latitude, longitude, avg_daily_irradiance, est_daily_energy])

798it [00:10, 75.28it/s]


KeyboardInterrupt: 

In [None]:
# Turn condensed data into pandas dataframe
solar_data_per_city_df = pd.DataFrame(solar_data_per_city, columns=['City/Town', 'State', 'County', 'Latitude', 'Longitude', 'Avg Daily Irradiance', 'Est. Energy Production Per Day'])
print(solar_data_per_city_df.head())

In [None]:
# Download data as csv 
solar_data_per_city_df.to_csv('Data/solar_data_per_city.csv', index=False)