In [10]:
# %%
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys 
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from datetime import datetime
from dateutil.relativedelta import relativedelta
import pandas as pd
import bp_sql as bp
import os
import sys
import shutil
import time

In [None]:
# User credentials and URL
username = "bpassini"
password = "Ebitda12"
url = "https://www.smartmetertexas.com/dashboard/"
# %%
#download path
download_path = os.path.join(os.getcwd(),'smt_downloads')
csv_list = os.listdir(download_path)
max_csv_dt = pd.to_datetime(max(csv_list).split('.csv')[0])

# Check if the directory exists
if os.path.exists(download_path):
    # Remove the entire directory and all its contents
    shutil.rmtree(download_path)
    print(f"Cleared all contents of: {download_path}")

    # Recreate the empty directory
    os.makedirs(download_path)

chrome_options = Options()
prefs = {
    'download.default_directory': download_path,  # Set default download directory
    'download.prompt_for_download': False,  # Disable download prompt
    'directory_upgrade': True,  # Automatically overwrite existing files
    'safebrowsing.enabled': True  # Enable safe browsing
}
chrome_options.add_experimental_option('prefs', prefs)


# Dates for report
begin_of_time = '2023-02-01'

if max_csv_dt > pd.to_datetime(begin_of_time):
      begin_of_time = (max_csv_dt + relativedelta(months=1))

tday = pd.to_datetime(datetime.today().date())
max_dt = tday.replace(day=1) - relativedelta(days=1)


# Dates for report
begin_of_time = '2023-02-01'

if max_csv_dt > pd.to_datetime(begin_of_time):
      begin_of_time = (max_csv_dt + relativedelta(months=1))

tday = pd.to_datetime(datetime.today().date())
max_dt = tday.replace(day=1) - relativedelta(days=1)


#create tups of beg/end of each month since we've lived at this location
if begin_of_time < max_dt:
      dt_rng_df = pd.DataFrame(data=pd.date_range(begin_of_time, max_dt).to_list(), columns=['date'])
      dt_rng_df['mth'] = dt_rng_df['date'] - pd.to_timedelta(dt_rng_df['date'].dt.day - 1, unit='D')
      min_max_df = dt_rng_df.groupby('mth').agg({'date':['min','max']}).reset_index()
      dt_tups = list(zip(min_max_df['date']['min'],min_max_df['date']['max']))
else:
      print('CSVs are up to date. Must wait for entire month to settle before pulling more data')
      sys.exit()

#reformat tups to strings
str_tuples= [] 
for tup in dt_tups:
        formatted_tuple = tuple(date.strftime('%m/%d/%Y') for date in tup)
        str_tuples.append(formatted_tuple)

strt_str = pd.to_datetime(begin_of_time).strftime('%Y%m%d')
end_date = (pd.to_datetime(strt_str) + relativedelta(months=1) - relativedelta(days=1)).strftime("%m/%d/%Y")

# Set up Selenium WebDriver with WebDriver Manager
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)
driver.get(url)

# Log in
WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.ID, "userid"))).send_keys(username)
driver.find_element(By.ID, "password").send_keys(password)
driver.find_element(By.CSS_SELECTOR, ".btn.btn-large.btn-block.btn-primary").click()

# Wait for dashboard to load
WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.ID, "reporttype_input")))

# Select report type "Energy Data 15 Min Interval"
report_type_dropdown = Select(driver.find_element(By.ID, "reporttype_input"))
report_type_dropdown.select_by_visible_text("Energy Data 15 Min Interval")

In [None]:
def download_csv(date_tup=tuple):

    start_dt_str, end_dt_str = date_tup

    print(start_dt_str, end_dt_str)

    # Set the start date
    start_date_field = driver.find_element(By.ID, "startdatefield")
    start_date_field.send_keys(Keys.END)  # Move cursor to end of the current value
    start_date_field.send_keys(Keys.BACKSPACE * 12)  # Clear the current value completely
    start_date_field.send_keys(start_dt_str)
    start_date_field.send_keys(Keys.ENTER)

    # Set the end date
    end_date_field = driver.find_element(By.ID, "enddatefield")
    end_date_field.send_keys(Keys.END)  # Move cursor to end of the current value
    end_date_field.send_keys(Keys.BACKSPACE * 12)  # Clear the current value completely
    end_date_field.send_keys(end_dt_str)
    end_date_field.send_keys(Keys.ENTER)

    # submit update
    driver.find_element(By.XPATH, '//button[@class="btn updreport-button"]').click()

    #download the csv
    driver.find_element(By.XPATH, '//button[@class="btn meter-search-button"]').click()

    # Rename the downloaded file
    old_filename = os.path.join(download_path,'IntervalData.csv')
    new_filename = os.path.join(download_path,f'''{pd.to_datetime(start_dt_str).strftime('%Y%m%d')}.csv''')

    # Ensure the file exists before renaming
    start_time = time.time()
    download_timeout = 60

    while True:
        
        if os.path.exists(old_filename):
            os.rename(old_filename, new_filename)
            print(f"File renamed to {new_filename}")
            break

        # Check for timeout
        if time.time() - start_time > download_timeout:
            print(f"Timeout: '{new_filename}' was not downloaded within {download_timeout} seconds.")
            break
        time.sleep(1)

    return print(f'''{new_filename}: Downloaded''')

In [None]:
for tup in str_tuples:
    download_csv(tup)



In [None]:
driver.close()

In [None]:
csv_list = os.listdir(download_path)
df = pd.concat([pd.read_csv(os.path.join(download_path,csv)) for csv in csv_list], ignore_index=True)

In [7]:
download_path = os.path.join(os.getcwd(),'smt_downloads')
csv_list = os.listdir(download_path)
max_csv_dt = pd.to_datetime(max(csv_list).split('.csv')[0])

# Dates for report
begin_of_time = '2023-02-01'

if max_csv_dt > pd.to_datetime(begin_of_time):
      begin_of_time = (max_csv_dt + relativedelta(months=1))

tday = pd.to_datetime(datetime.today().date())
max_dt = tday.replace(day=1) - relativedelta(days=1)



#create tups of beg/end of each month since we've lived at this location
if begin_of_time < max_dt:
      dt_rng_df = pd.DataFrame(data=pd.date_range(begin_of_time, max_dt).to_list(), columns=['date'])
      dt_rng_df['mth'] = dt_rng_df['date'] - pd.to_timedelta(dt_rng_df['date'].dt.day - 1, unit='D')
      min_max_df = dt_rng_df.groupby('mth').agg({'date':['min','max']}).reset_index()
      dt_tups = list(zip(min_max_df['date']['min'],min_max_df['date']['max']))
else:
      print('CSVs are up to date. Must wait for entire month to settle before pulling more data')
      sys.exit()

AttributeError: Can only use .dt accessor with datetimelike values

False