# Tree's Count and Tree Risk
## This script takes Tree Count and Tree Risk PDF reports from the specified folder and extracts information neccessary to map the reports. It then updates the corresponding feature layer. The only thing you need to do is delete duplicate PDF's and copy them into the corresponding folder. Then run through this script. Once the script has been run, verify changes have been made to the layer in AGOL and update the title to reflect changes. Send the email.

## Imports and Variable Assignment

In [None]:
from datetime import datetime, timedelta
import pdfplumber
from pathlib import Path
import os
import re
import pandas as pd
arcpy.env.overwriteOutput = True
current_time = datetime.now()
current_month = current_time.month
current_year = current_time.year

#variables to combine that creates a link to the sharepoint report
base_url = "https://texasforestservice.sharepoint.com/sites/Share-ForestAnalytics/Documents/Geospatial Systems/Texas Forest Info/TreesCount/"
html_start = "<a href = "
html_end = ">Click Here </a>"
apos = "'"

### ----------------- UPDATE THE FOLLOWING PATHS AS NEEDED ------------------------------###
trees_count_pdf_dir = Path(r'D:\ArcGIS_Projects\TreesCount\PDFs\TreesCount')
tree_risk_pdf_dir = Path(r'D:\ArcGIS_Projects\TreesCount\PDFs\TreeRisk')
###-------------------TREES COUNT PATHS---------------------------------------------------##
tc_xy_feature = rf'D:\ArcGIS_Projects\TreesCount\Code\Data\TreesCount.gdb\XYTableToPoint_{current_month}_{current_year}_count'
tc_csv_path = rf'D:\ArcGIS_Projects\TreesCount\TreesCountCSVs\{current_month}_{current_year}_count.csv'
tc_target_layer = "TreesCount"
###-------------------TREES RISK PATHS---------------------------------------------------##
tr_xy_feature = rf'D:\ArcGIS_Projects\TreesCount\Code\Data\TreesCount.gdb\XYTableToPoint_{current_month}_{current_year}_risk'
tr_csv_path = rf'D:\ArcGIS_Projects\TreesCount\TreesCountCSVs\{current_month}_{current_year}_risk.csv'
tc_target_layer = "TreeRisk"
print("Variables Set")

## Function Delcarations

In [None]:
def open_file(path):
    os.startfile(path)
def extract_pdf(pdf_dir):
    data = []
    for pdf_file_path in pdf_dir.glob("*.pdf"):  # pathlib way to list PDFs
        with pdfplumber.open(pdf_file_path) as pdf:
            file_name = pdf_file_path.name  # get just the file name
            file_name_sharepoint = html_start + apos + base_url + file_name + apos + html_end
            
            # extract date from filename
            file_date = file_name[:8]                                                           
            date_object = datetime.strptime(file_date, "%Y%m%d").date()                         
            day_before = date_object - timedelta(days=1)                                        
    
            # date strings
            month = date_object.strftime("%m").lstrip('0')                                      
            day = date_object.strftime("%d").lstrip("0")                                        
            date_string = f"{month}/{day}"                                                      
            backwards_date_string = f"{day}/{month}"                                            
    
            month_daybefore = day_before.strftime("%m").lstrip('0')                             
            day_daybefore = day_before.strftime("%d").lstrip("0")
            day_before_string = f"{month_daybefore}/{day_daybefore}"
            backwards_day_before_string = f"{day_daybefore}/{month_daybefore}"
    
            # regex pattern
            float_pattern = r"-?\d{1,3}\.\d{3,}"                                                
            count = 0                                                                            
            first_float = None                                                                   
            second_float = None                                                                  
    
            for page in pdf.pages:
                text = page.extract_text()
    
                if date_string in text:                                                            
                    count += text.count(date_string)                                               
                elif count == 0 and day_before_string in text:
                    count += text.count(day_before_string)
                elif count == 0 and backwards_date_string in text:
                    count += text.count(backwards_date_string)
                elif count == 0 and backwards_day_before_string in text:
                    count += text.count(backwards_day_before_string)
    
                if page.page_number == 2:                                                           
                    float_matches = re.findall(float_pattern, text)
    
                    if len(float_matches) >= 2:
                        first_float = float_matches[0]
                        second_float = float_matches[1]
    
            # Append the data to the list
            data.append({
                "DateSub": date_object,
                "Trees": count,
                "Xcoord": second_float,
                "Ycoord": first_float,
                "Report": file_name_sharepoint,
                "FilePath":pdf_file_path
            })
    
    return pd.DataFrame(data)

def fix_zeros(df: pd.DataFrame) -> pd.DataFrame:
    if df['Trees'].eq(0).any():
        # Define a function to fix rows with Count == 0
        def fix_count(row):
            if row['Trees'] == 0:
                print(f"\nOpening file: {row['FilePath']}")
                open_file(row['FilePath'])  # this opens the PDF automatically
                new_count = int(input("Please enter the correct number of trees (0 to delete the row): "))
                if new_count == 0:
                    return None  
                return new_count 
            return row['Trees']
    
        # Apply the function to iterate through all rows
        df['Trees'] = df.apply(fix_count, axis=1)
    
        # Drop rows where Count is None (indicating they should be deleted)
        df = df.dropna(subset=['Trees'])
        df = df.drop(columns=['FilePath'])
        # Display updated DataFrame
        print("\nUpdated DataFrame:")
        print(df)
    else:
        print("No rows need to be fixed. All counts are non-zero.")
    return df

def append_AGOL_layer(
    df: pd.DataFrame,
    csv_path: str,
    xy_feature: str,
    target_layer: str,
    spatial_ref: arcpy.SpatialReference | None = None
) -> None:
    if spatial_ref is None:
        spatial_ref = arcpy.SpatialReference(4326)
    # Write DataFrame to CSV
    df.to_csv(csv_path, index=False)

    # Convert table to points
    arcpy.management.XYTableToPoint(
        csv_path,
        xy_feature,
        'Xcoord',
        'Ycoord',
        None,
        spatial_ref
    )

    # Append to target layer
    arcpy.management.Append(
        xy_feature,
        target_layer,
        "NO_TEST",
        update_geometry="UPDATE_GEOMETRY"
    )
def process_pdfs(pdf_dir):
    df = extract_pdf(pdf_dir)
    df = fix_zeros(df)
    return df
print("Functions Set")

## Trees Count Processing

In [None]:
treescount_df = process_pdfs(trees_count_pdf_dir)

In [None]:
append_AGOL_layer(
    treescount_df,
    tc_csv_path,
    tc_xy_feature,
    tc_target_layer
)
print("Trees Count Layer Processed")

## Tree Risk Processing

In [None]:
treerisk_df = process_pdfs(tree_risk_pdf_dir)

In [None]:
append_AGOL_layer(
    treerisk_df,
    tr_csv_path,
    tr_xy_feature,
    tr_target_layer
)
print("Tree Risk Layer Processed")