# Tree's Count and Tree Risk
## This script takes Tree Count and Tree Risk PDF reports from the specified folder and extracts information neccessary to map the reports. It then updates the corresponding feature layer. The only thing you need to do is delete duplicate PDF's and copy them into the corresponding folder. Then run through the scripts. Once the scripts have been run, verify changes have been made to the layer in AGOL and update the title to reflect changes. Send the email.

## Trees Count

In [13]:
from datetime import datetime, timedelta
import pdfplumber
import os
import re
import pandas as pd
arcpy.env.overwriteOutput = True
current_time = datetime.now()
current_month = current_time.month
current_year = current_time.year

pdf_dir_count = r'D:\ArcGIS_Projects\TreesCount\PDFs\TreesCount'

#empty list for final excel output
data = []
#variables to combine that creates a link to the sharepoint report
base_url = "https://texasforestservice.sharepoint.com/sites/Share-ForestAnalytics/Documents/Geospatial Systems/Texas Forest Info/TreesCount/"
html_start = "<a href = "
html_end = ">Click Here </a>"
apos = "'"
spatial_ref = arcpy.SpatialReference(4326)
xy_feature = rf'D:\ArcGIS_Projects\TreesCount\Code\Data\TreesCount.gdb\XYTableToPoint_{current_month}_{current_year}_count'
csv_path = rf'D:\ArcGIS_Projects\TreesCount\{current_month}_{current_year}_count.csv'

D:\ArcGIS_Projects\TreesCount\9_2024.csv


In [14]:
#run through the folder and look for pdfs
for pdf_file in os.listdir(pdf_dir_count):
    if pdf_file.endswith('.pdf'):
        pdf_file_path = os.path.join(pdf_dir_count, pdf_file)

        with pdfplumber.open(pdf_file_path) as pdf:
            file_name = os.path.basename(pdf_file)
            file_name_sharepoint = html_start + apos + base_url + file_name + apos + html_end   
            file_date = file_name[:8]                                                           
            date_object = datetime.strptime(file_date, "%Y%m%d").date()                         
            day_before = date_object - timedelta(days=1)                                        

            month = date_object.strftime("%m").lstrip('0')                                      
            day = date_object.strftime("%d").lstrip("0")                                        
            date_string = f"{month}/{day}"                                                      
            backwards_date_string = f"{day}/{month}"                                            

            month_daybefore = day_before.strftime("%m").lstrip('0')                             
            day_daybefore = day_before.strftime("%d").lstrip("0")
            day_before_string = f"{month_daybefore}/{day_daybefore}"
            backwards_day_before_string = f"{day_daybefore}/{month_daybefore}"

            float_pattern = r"-?\d{1,3}\.\d{3,}"                                                
            count = 0                                                                            
            first_float = None                                                                   
            second_float = None                                                                  

            for page in pdf.pages:
                text = page.extract_text()

                if date_string in text:                                                            
                    count += text.count(date_string)                                               
                elif count == 0 and day_before_string in text:
                    count += text.count(day_before_string)
                elif count == 0 and backwards_date_string in text:
                    count += text.count(backwards_date_string)
                elif count == 0 and backwards_day_before_string in text:
                    count += text.count(backwards_day_before_string)

                if page.page_number == 2:                                                           
                    float_matches = re.findall(float_pattern, text)

                    if len(float_matches) >= 2:
                        first_float = float_matches[0]
                        second_float = float_matches[1]

            # Append the data to the list
            data.append({
                "DateSub": date_object,
                "Trees": count,
                "Xcoord": second_float,
                "Ycoord": first_float,
                "Report": file_name_sharepoint
            })

df = pd.DataFrame(data)
rows_with_notrees = df[df['Trees'] == 0]
print("Done, run next cell")

Done, run next cell


In [15]:
pd.set_option('display.max_columns', None)

# Display full content in each column
pd.set_option('display.max_colwidth', None)
print(rows_with_notrees)

       DateSub  Trees        Xcoord       Ycoord  \
3   2024-03-14      0   -71.2596993   46.8744583   
4   2024-03-14      0   -71.2596313   46.8743814   
5   2024-03-14      0   -71.2097285   46.8524902   
6   2024-03-14      0   173.4495628  -35.1153435   
11  2024-03-24      0    78.4663569   17.4441692   
14  2024-03-26      0  -113.3170644   53.5137968   
15  2024-03-26      0  -113.3170644   53.5137968   
18  2024-03-29      0   -75.2451300   39.4319289   

                                                                                                                                                                                                                     Report  
3                   <a href = 'https://texasforestservice.sharepoint.com/sites/Share-ForestAnalytics/Documents/Geospatial Systems/Texas Forest Info/TreesCount/20240314152223_Trees Count - 815 Loiret.pdf'>Click Here </a>  
4                   <a href = 'https://texasforestservice.sharepoint.com/sites/Share-Fo

In [16]:
if df['Trees'].eq(0).any():
    # Define a function to fix rows with Count == 0
    def fix_count(row):
        if row['Trees'] == 0:
            print(f"\nFile '{row['Report']}', Coordinates: ({row['Xcoord']}, {row['Ycoord']})")
            new_count = int(input("Please enter the correct number of trees (0 to delete the row): "))
            if new_count == 0:
                return None  
            return new_count 
        return row['Trees'] 

    # Apply the function to iterate through all rows
    df['Trees'] = df.apply(fix_count, axis=1)

    # Drop rows where Count is None (indicating they should be deleted)
    df = df.dropna(subset=['Trees'])

    # Display updated DataFrame
    print("\nUpdated DataFrame:")
    print(df)
else:
    print("No rows need to be fixed. All counts are non-zero.")


File '<a href = 'https://texasforestservice.sharepoint.com/sites/Share-ForestAnalytics/Documents/Geospatial Systems/Texas Forest Info/TreesCount/20240314152223_Trees Count - 815 Loiret.pdf'>Click Here </a>', Coordinates: (-71.2596993, 46.8744583)
Please enter the correct number of trees (0 to delete the row): 1

File '<a href = 'https://texasforestservice.sharepoint.com/sites/Share-ForestAnalytics/Documents/Geospatial Systems/Texas Forest Info/TreesCount/20240314160817_Trees Count - 815 Loiret.pdf'>Click Here </a>', Coordinates: (-71.2596313, 46.8743814)
Please enter the correct number of trees (0 to delete the row): 1

File '<a href = 'https://texasforestservice.sharepoint.com/sites/Share-ForestAnalytics/Documents/Geospatial Systems/Texas Forest Info/TreesCount/20240314202433_Trees Count - Projet Loyola.pdf'>Click Here </a>', Coordinates: (-71.2097285, 46.8524902)
Please enter the correct number of trees (0 to delete the row): 1

File '<a href = 'https://texasforestservice.sharepoint

In [17]:
df.to_csv(csv_path)
arcpy.management.XYTableToPoint(csv_path, xy_feature, 'Xcoord', 'Ycoord', None, spatial_ref)

#Uncomment the one you want to append and comment the other
arcpy.management.Append(xy_feature, "TreesCount", "NO_TEST", update_geometry = "UPDATE_GEOMETRY")
# arcpy.management.Append(xy_feature, "TreeRisk", "NO_TEST", update_geometry = "UPDATE_GEOMETRY")

## Tree Risk

In [None]:
from datetime import datetime, timedelta
import pdfplumber
import os
import re
import pandas as pd
arcpy.env.overwriteOutput = True
current_time = datetime.now()
current_month = current_time.month
current_year = current_time.year

pdf_dir_risk = r'D:\ArcGIS_Projects\TreesCount\PDFs\TreeRisk'

#empty list for final excel output
data = []
#variables to combine that creates a link to the sharepoint report
base_url = "https://texasforestservice.sharepoint.com/sites/Share-ForestAnalytics/Documents/Geospatial Systems/Texas Forest Info/TreesCount/"
html_start = "<a href = "
html_end = ">Click Here </a>"
apos = "'"
spatial_ref = arcpy.SpatialReference(4326)
xy_feature = rf'D:\ArcGIS_Projects\TreesCount\Code\Data\TreesCount.gdb\XYTableToPoint_{current_month}_{current_year}_risk'
csv_path = rf'D:\ArcGIS_Projects\TreesCount\{current_month}_{current_year}_risk.csv'

In [None]:
#run through the folder and look for pdfs
for pdf_file in os.listdir(pdf_dir_risk):
    if pdf_file.endswith('.pdf'):
        pdf_file_path = os.path.join(pdf_dir_risk, pdf_file)

        with pdfplumber.open(pdf_file_path) as pdf:
            file_name = os.path.basename(pdf_file)
            file_name_sharepoint = html_start + apos + base_url + file_name + apos + html_end   
            file_date = file_name[:8]                                                           
            date_object = datetime.strptime(file_date, "%Y%m%d").date()                         
            day_before = date_object - timedelta(days=1)                                        

            month = date_object.strftime("%m").lstrip('0')                                      
            day = date_object.strftime("%d").lstrip("0")                                        
            date_string = f"{month}/{day}"                                                      
            backwards_date_string = f"{day}/{month}"                                            

            month_daybefore = day_before.strftime("%m").lstrip('0')                             
            day_daybefore = day_before.strftime("%d").lstrip("0")
            day_before_string = f"{month_daybefore}/{day_daybefore}"
            backwards_day_before_string = f"{day_daybefore}/{month_daybefore}"

            float_pattern = r"-?\d{1,3}\.\d{3,}"                                                
            count = 0                                                                            
            first_float = None                                                                   
            second_float = None                                                                  

            for page in pdf.pages:
                text = page.extract_text()

                if date_string in text:                                                            
                    count += text.count(date_string)                                               
                elif count == 0 and day_before_string in text:
                    count += text.count(day_before_string)
                elif count == 0 and backwards_date_string in text:
                    count += text.count(backwards_date_string)
                elif count == 0 and backwards_day_before_string in text:
                    count += text.count(backwards_day_before_string)

                if page.page_number == 2:                                                           
                    float_matches = re.findall(float_pattern, text)

                    if len(float_matches) >= 2:
                        first_float = float_matches[0]
                        second_float = float_matches[1]

            # Append the data to the list
            data.append({
                "DateSub": date_object,
                "Trees": count,
                "Xcoord": second_float,
                "Ycoord": first_float,
                "Report": file_name_sharepoint
            })

df = pd.DataFrame(data)
rows_with_notrees = df[df['Trees'] == 0]
print("Done, run next cell")

In [None]:
pd.set_option('display.max_columns', None)

# Display full content in each column
pd.set_option('display.max_colwidth', None)
print(rows_with_notrees)

In [None]:
if df['Trees'].eq(0).any():
    # Define a function to fix rows with Count == 0
    def fix_count(row):
        if row['Trees'] == 0:
            print(f"\nFile '{row['Report']}', Coordinates: ({row['Xcoord']}, {row['Ycoord']})")
            new_count = int(input("Please enter the correct number of trees (0 to delete the row): "))
            if new_count == 0:
                return None  
            return new_count 
        return row['Trees'] 

    # Apply the function to iterate through all rows
    df['Trees'] = df.apply(fix_count, axis=1)

    # Drop rows where Count is None (indicating they should be deleted)
    df = df.dropna(subset=['Trees'])

    # Display updated DataFrame
    print("\nUpdated DataFrame:")
    print(df)
else:
    print("No rows need to be fixed. All counts are non-zero.")

In [None]:
df.to_csv(csv_path)
arcpy.management.XYTableToPoint(csv_path, xy_feature, 'Xcoord', 'Ycoord', None, spatial_ref)
arcpy.management.Append(xy_feature, "TreeRisk", "NO_TEST", update_geometry = "UPDATE_GEOMETRY")