In [1]:
from IPython.display import Markdown, display

display(Markdown("solar BNL.md"))

# [Berkeley Labs - Tracking the Sun](https://emp.lbl.gov/tracking-the-sun)

Berkeley Lab’s annual Tracking the Sun report describes trends among grid-connected, distributed solar photovoltaic (PV) systems in the United States. The latest edition of the report focuses on systems installed through year-end 2021, and is based on data from roughly 2.5 million systems. New to the report this year is an expanded coverage of paired PV-plus-storage systems, including details on system design and pricing trends.

Sources for MA are the same as the [Production Tracking System](https://www.masscec-pts.com/#/home) with many additional metrics; for examples:

* Up to 3 module and inverter manufacturer, model and panel quantity details
* number of panels and panel nameplate capacity
* technology details; BIVP, bifacial, efficiencies
* orientation; azimuth, tilt
* battery installations
* unique system identifier
* ~80 columns vs 6 for PTS

## Set-Up

In [None]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv (
        find_dotenv (
            usecwd=True
        ),
    override=True
) # read local .env file and override any existing

from sqlalchemy import create_engine
from os import environ

username     =  environ.get("POSTGRES_USERNAME", "postgres")
password     =  environ.get("POSTGRES_PASSWORD", "postgres")
ipaddress    =  environ.get("POSTGRES_IPADDRESS", "localhost")
port         =  environ.get("POSTGRES_PORT", "5432")
dbname       =  environ.get("POSTGRES_DBNAME", "ArlingtonMA")

#establish database connection for Transform queries and Loads
cnx= create_engine(f'postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}')


In [None]:
import pandas as pd

## Extract and Transform

In [None]:
def bnl_pv_datasets ( 
    year    =  2021 ,
    subset  =  'csv' ,
    url     =  'https://emp.lbl.gov/sites/default/files/lbnl_publicdatafile_dpv_2021_update_jan2022update.zip'
) :
    #    url     =  'https://emp.lbl.gov/sites/default/files/lbnl_publicdatafile_dpv_{year}_update_dec{year}update.zip'
    url = 'https://emp.lbl.gov/sites/default/files/public_datafile.zip'
    from urllib.request  import urlopen
    from io              import BytesIO
    from zipfile         import ZipFile

    data = dict ( )

    with urlopen ( url . format ( year = year ) ) as f :
        
        with BytesIO ( f . read ( ) ) as b , ZipFile ( b ) as myzipfile :
            
            for name in myzipfile . namelist ( )  :  

                if subset in name :

                    try :
                        data [ name ] =  pd . read_csv ( myzipfile . open ( name ) , low_memory=False )

                    except :
                        print ( 'not tracked' , name )
    
    return data

def transform_bnl(bnl,dor):
    ma_solar = bnl[bnl.state=='MA']\
        . reset_index(drop=True)\
        . replace({'^-1$':None,-1:None},regex=True)\
        . copy()

    ma_solar.city = ma_solar.city\
                .replace({'Manchester':'Manchester By The Sea',
                          'ManchesterbytheSea':'Manchester By The Sea'})
    ma_solar      = ma_solar\
                .merge(dor,how='left',right_on='value',left_on='city')\
                .rename(columns={'key':'dor'})

    date_cols = ['dateOfBatteryInstall']
    ma_solar.dateOfBatteryInstall = pd.to_datetime(ma_solar.dateOfBatteryInstall).dt.strftime('%Y-%m-%d')
    
    drop_cols=[
        'system_ID_2',
        'installation_date',
        'new_construction',
        'state',
        'city',
        'zip_code',
        'value'
    ]
    
    ma_solar = ma_solar.drop(drop_cols,axis=1)

    ma_solar.total_installed_price=ma_solar.total_installed_price.astype(float)
    ma_solar.rebate_or_grant=ma_solar.rebate_or_grant.astype(float)
    
    int_value_pairs = pd.DataFrame()
    
    ivp_cols=[
        'data_provider_1','data_provider_2',
        'customer_segment',
        'utility_service_territory',
        'installer_name',
        'battery_manufacturer','battery_model'
    ]    
    
    for col in ivp_cols:
        a = [x for x in ma_solar[col].sort_values().unique() if x !=None]
        ma_solar[col]=ma_solar[col].replace(dict(zip(a,range(len(a)))))
        ma_solar=ma_solar.rename(columns={col:('bnl_'+col)})
    
        ivp = pd.DataFrame.from_dict(
            dict(zip(range(len(a)),a)),
            orient='index').reset_index().rename(columns={'index':'key',0:'value'})
        ivp['item']='bnl_'+col
        
        int_value_pairs = pd.concat([int_value_pairs,ivp])
    
    
    ivp_repeat_cols = [
        'module_manufacturer',
        'module_model',
        'technology_module',
        'inverter_manufacturer',
        'inverter_model'
    ]
    
    for col in ivp_repeat_cols:
        tmp_col = pd.concat([ma_solar[col+'_1'],ma_solar[col+'_2'],ma_solar[col+'_3']])
        
        a = [x for x in tmp_col.sort_values().unique() if x !=None]
    
        for idx in ['_1','_2','_3']:
            ma_solar[col+idx]=ma_solar[col+idx].replace(dict(zip(a,range(len(a)))))
            ma_solar=ma_solar.rename(columns={(col+idx):('bnl_'+col+idx)})
    
        ivp = pd.DataFrame.from_dict(
            dict(zip(range(len(a)),a)),
            orient='index').reset_index().rename(columns={'index':'key',0:'value'})
        ivp['item']='bnl_'+col
        
        int_value_pairs = pd.concat([int_value_pairs,ivp])

    return ma_solar, int_value_pairs

def get_dor(cnx):
   
    return pd.read_sql_query("select key,value from common.int_value_pairs where item='dor'",cnx)

In [None]:
data = bnl_pv_datasets()

bnl = pd.DataFrame()
for key in data.keys():
    bnl = pd.concat([bnl,data[key]])

bnl.reset_index(drop=True,inplace=True)
bnl['date'] = pd.to_datetime(bnl.installation_date,format='%d-%b-%Y',errors='coerce').dt.strftime('%Y-%m-%d')

print('rows:',len(bnl),'cols:',len(bnl.columns))

print('BNL,{installs},{first},{last}'.format(installs=len(bnl[mask]),first=bnl[mask].date.min(),last=bnl[mask].date.max()))

dor = get_dor(cnx)

ma_solar, int_value_pairs = transform_bnl(bnl,dor)

## LOAD

In [None]:
table_create_solar_bnl = \
    """
        DROP TABLE IF EXISTS energy.solar_bnl;
        CREATE TABLE energy.solar_bnl (
            "dor" SMALLINT,
            "date" DATE,
            "bnl_data_provider_1" SMALLINT,
            "bnl_data_provider_2" SMALLINT,
            "system_ID_1" VARCHAR(18),
            "system_size_DC" REAL,
            "total_installed_price" REAL,
            "rebate_or_grant" REAL,
            "bnl_customer_segment" SMALLINT,
            "expansion_system" SMALLINT,
            "multiple_phase_system" SMALLINT,
            "tracking" SMALLINT,
            "ground_mounted" SMALLINT,
            "bnl_utility_service_territory" SMALLINT,
            "third_party_owned" SMALLINT,
            "bnl_installer_name" SMALLINT,
            "self_installed" SMALLINT,
            "azimuth_1" SMALLINT,
            "azimuth_2" SMALLINT,
            "azimuth_3" SMALLINT,
            "tilt_1" SMALLINT,
            "tilt_2" SMALLINT,
            "tilt_3" SMALLINT,
            "bnl_module_manufacturer_1" SMALLINT,
            "bnl_module_model_1" SMALLINT,
            "module_quantity_1" SMALLINT,
            "bnl_module_manufacturer_2" SMALLINT,
            "bnl_module_model_2" SMALLINT,
            "module_quantity_2" SMALLINT,
            "bnl_module_manufacturer_3" SMALLINT,
            "bnl_module_model_3" SMALLINT,
            "module_quantity_3" SMALLINT,
            "additional_modules" SMALLINT,
            "bnl_technology_module_1" SMALLINT,
            "bnl_technology_module_2" SMALLINT,
            "bnl_technology_module_3" SMALLINT,
            "BIPV_module_1" SMALLINT,
            "BIPV_module_2" SMALLINT,
            "BIPV_module_3" SMALLINT,
            "bifacial_module_1" SMALLINT,
            "bifacial_module_2" SMALLINT,
            "bifacial_module_3" SMALLINT,
            "nameplate_capacity_module_1" SMALLINT,
            "nameplate_capacity_module_2" SMALLINT,
            "nameplate_capacity_module_3" SMALLINT,
            "efficiency_module_1" REAL,
            "efficiency_module_2" REAL,
            "efficiency_module_3" REAL,
            "bnl_inverter_manufacturer_1" SMALLINT,
            "bnl_inverter_model_1" SMALLINT,
            "inverter_quantity_1" SMALLINT,
            "bnl_inverter_manufacturer_2" SMALLINT,
            "bnl_inverter_model_2" SMALLINT,
            "inverter_quantity_2" SMALLINT,
            "bnl_inverter_manufacturer_3" SMALLINT,
            "bnl_inverter_model_3" SMALLINT,
            "inverter_quantity_3" SMALLINT,
            
            "additional_inverters" SMALLINT,
            "micro_inverter_1" SMALLINT,
            "micro_inverter_2" SMALLINT,
            "micro_inverter_3" SMALLINT,
            "built_in_meter_inverter_1" SMALLINT,
            "built_in_meter_inverter_2" SMALLINT,
            "built_in_meter_inverter_3" SMALLINT,
            "output_capacity_inverter_1" SMALLINT,
            "output_capacity_inverter_2" SMALLINT,
            "output_capacity_inverter_3" SMALLINT,
            "DC_optimizer" SMALLINT,
            "inverter_loading_ratio" REAL,
            "dateOfBatteryInstall" DATE,
            "bnl_battery_manufacturer" SMALLINT,
            "bnl_battery_model" SMALLINT,
            "battery_rated_capacity_kW" SMALLINT,
            "battery_rated_capacity_kWh" SMALLINT,
            PRIMARY KEY ("system_ID_1")
        );
        
        CREATE INDEX solar_bnl_idx 
            ON energy.solar_bnl("dor");
        CREATE INDEX solar_bnl_idx_date 
            ON energy.solar_bnl("date");
        
    """
cnx.execute(table_create_solar_bnl)

ma_solar.to_sql(
    'solar_bnl',
    schema='energy',
    con=cnx,
    if_exists='append',
    index=False
) 