<a href="https://colab.research.google.com/github/jeffblackadar/CRANE-CCAD-maps/blob/main/text_on_map_tiles_transcription_font_size_shared.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This notebook uses Azure Cognitive Services to transcribe text on an old map.
It then puts the text with its geographic location into a shapefile.
A blog post about it is here: http://jeffblackadar.ca/

In [None]:
# https://towardsdatascience.com/mapping-with-matplotlib-pandas-geopandas-and-basemap-in-python-d11b57ab5dac
# https://stackoverflow.com/questions/54613992/how-to-install-and-use-basemap-on-google-colab
# !apt-get install libgeos-3.6.2 (I am not sure if this is needed)
!apt-get install libgeos-dev
!pip install https://github.com/matplotlib/basemap/archive/master.zip
!pip install geopandas
!pip install contextily
!pip install -U rasterio

In [None]:
!pip install --upgrade azure-cognitiveservices-vision-computervision

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
settings_path = "/content/drive/MyDrive/william_white/"
    
import os
import json
# Set the file we wan!pip install --upgrade azure-cognitiveservices-vision-computervisiont to use for authenticating an Azure app
# The json file stores the COMPUTER_VISION_SUBSCRIPTION_KEY and COMPUTER_VISION_ENDPOINT so we don't have it in the code.
# The json file looks like this:
# {
# "COMPUTER_VISION_SUBSCRIPTION_KEY":"___the_COMPUTER_VISION_SUBSCRIPTION_KEY___",
# "COMPUTER_VISION_ENDPOINT":"___the_COMPUTER_VISION_ENDPOINT___"
# }
# cv.json contains the credentials for this program.
cv_settings_file = settings_path+'cv.json'
with open(cv_settings_file, "r") as read_file:
    cv_auth_data = json.load(read_file)

os.environ['COMPUTER_VISION_SUBSCRIPTION_KEY']=cv_auth_data["COMPUTER_VISION_SUBSCRIPTION_KEY"]
os.environ['COMPUTER_VISION_ENDPOINT']=cv_auth_data["COMPUTER_VISION_ENDPOINT"]

In [None]:
# https://docs.microsoft.com/en-us/azure/cognitive-services/computer-vision/quickstarts-sdk/python-sdk
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from azure.cognitiveservices.vision.computervision.models import VisualFeatureTypes
from msrest.authentication import CognitiveServicesCredentials
from msrest.authentication import CognitiveServicesCredentials

from array import array
import os
from PIL import Image
import sys
import time
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import geopandas as gpd
import pandas as pd
import contextily as ctx

In [None]:
# Add your Computer Vision subscription key to your environment variables.
if 'COMPUTER_VISION_SUBSCRIPTION_KEY' in os.environ:
    subscription_key = os.environ['COMPUTER_VISION_SUBSCRIPTION_KEY']
else:
    print("\nSet the COMPUTER_VISION_SUBSCRIPTION_KEY environment variable.\n**Restart your shell or IDE for changes to take effect.**")
    sys.exit()
# Add your Computer Vision endpoint to your environment variables.
if 'COMPUTER_VISION_ENDPOINT' in os.environ:
    endpoint = os.environ['COMPUTER_VISION_ENDPOINT']
else:
    print("\nSet the COMPUTER_VISION_ENDPOINT environment variable.\n**Restart your shell or IDE for changes to take effect.**")
    sys.exit()

In [None]:
computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key))

# Set this up for a series of images

In [None]:
import math
def recognize_text_in_local_image(local_image_handwritten_path):
    local_image_handwritten = open(local_image_handwritten_path, "rb")
    # Call API with image and raw response (allows you to get the operation location)
    recognize_handwriting_results = computervision_client.read_in_stream(local_image_handwritten, raw=True)

    # Get the operation location (URL with an ID at the end) from the response
    operation_location_remote = recognize_handwriting_results.headers["Operation-Location"]
    # Grab the ID from the URL
    operation_id = operation_location_remote.split("/")[-1]

    # Call the "GET" API and wait for it to retrieve the results 
    while True:
        get_handw_text_results = computervision_client.get_read_result(operation_id)
        if get_handw_text_results.status not in ['notStarted', 'running']:
            break
        time.sleep(1)

    # Print the detected text, line by line
    lines_of_text = []
    if get_handw_text_results.status == OperationStatusCodes.succeeded:
        for text_result in get_handw_text_results.analyze_result.read_results:
            for line in text_result.lines:
                line_data = []  
                print(line.text)
                line_data.append(line.text)
                #print(line.bounding_box)
                line_data.append(line.bounding_box)
                pts = line_data[1]
                xd = abs(pts[4] - pts[0])
                yd = abs(pts[5] - pts[1])
                word_length = math.sqrt((xd ** 2) + (yd ** 2))
            
                letter_length = round(word_length/len(line.text))
                print(letter_length)
                line_data.append(letter_length)
                lines_of_text.append(line_data)
    return(lines_of_text)          


In [None]:
import cv2
import matplotlib.pyplot as plt
import gdal
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point, Polygon

font                   = cv2.FONT_HERSHEY_SIMPLEX
bottomLeftCornerOfText = (10,500)
fontScale              = 1
fontColor              = (0,0,200)
fontColor_small        = (200,0,0)
lineType               = 2

def get_point_from_pixel(x,y, xoffset, px_w, rot1, yoffset, px_h, rot2):
    # supposing x and y are your pixel coordinate this 
    # is how to get the coordinate in space.
    posX = px_w * x + rot1 * y + xoffset
    posY = rot2 * x + px_h * y + yoffset

    posX = (px_w * x) + (rot1 * y) + xoffset
    posY = (px_h * y) + (rot2 * x) + yoffset
    # shift to the center of the pixel
    posX += px_w / 2.0
    posY += px_h / 2.0
    return(posX,posY)

def geocode_labels(local_image_handwritten_path, lines_of_text):
    img = cv2.imread(local_image_handwritten_path)
    img_ds = gdal.Open(local_image_handwritten_path)
    xoffset, px_w, rot1, yoffset, rot2, px_h  = img_ds.GetGeoTransform()

    line_text_poly = gpd.GeoDataFrame()
    line_text_poly['geometry'] = None
    geotif_crs = 32128
    line_text_poly.crs = ("EPSG:" + str(geotif_crs))
    line_text_poly.geometry = line_text_poly.geometry.to_crs(crs=geotif_crs)
    line_text_poly.to_crs(crs=geotif_crs)
    line_text_poly = line_text_poly.to_crs(epsg=geotif_crs)
   
    ln = 0
    for l in lines_of_text:
        pts = l[1]
        letter_size = l[2]
        fColor = fontColor
        if(letter_size < 30):
  
            p=[]
            for cn in range(0,4):
                p.append(get_point_from_pixel(pts[0+(cn*2)],pts[1+(cn*2)], xoffset, px_w, rot1, yoffset, px_h, rot2))
            coords = [(p[0][0], p[0][1]), (p[1][0], p[1][1]), (p[2][0], p[2][1]), (p[3][0], p[3][1])]
    
            poly = Polygon(coords)
            new_tp_row = {'id':ln, 'geometry':poly,'text':l[0]}
            line_text_poly = line_text_poly.append(new_tp_row, ignore_index=True)
    
    return(line_text_poly)

In [None]:
        tiles = []
'''   
        #no overlap tiles   
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r00c01.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r00c02.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r00c03.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r00c04.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r00c05.tif')
        #tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r01c00.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r01c01.tif')

        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r01c02.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r01c03.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r01c04.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r01c05.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r02c00.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r02c01.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r02c02.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r02c03.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r02c04.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r02c05.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r03c00.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r03c01.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r03c02.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r03c03.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r03c04.tif')
        #tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r03c05.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r04c00.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r04c01.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r04c02.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r04c03.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r04c04.tif')
        #tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r04c05.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r05c00.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r05c01.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r05c02.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r05c03.tif')
        #tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r05c04.tif')
        #tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r05c05.tif')                   
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r06c00.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r06c01.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r06c02.tif')
        #tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r06c03.tif')
        #tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r06c04.tif')
        #tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r06c05.tif')                   
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r07c00.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r07c01.tif')
        #tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r07c02.tif')
'''

        #tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r00c01.tif')
        #tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r00c02.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r00c03.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r00c04.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r00c05.tif')
        #tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r01c00.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r01c01.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r01c02.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r01c03.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r01c04.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r01c05.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r01c06.tif')
        #tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r02c00.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r02c01.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r02c02.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r02c03.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r02c04.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r02c05.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r02c06.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r03c00.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r03c01.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r03c02.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r03c03.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r03c04.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r03c05.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r04c00.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r04c01.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r04c02.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r04c03.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r04c04.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r04c05.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r05c00.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r05c01.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r05c02.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r05c03.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r05c04.tif')
        #tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r05c05.tif')                   
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r06c00.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r06c01.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r06c02.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r06c03.tif')
        #tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r06c04.tif')
        #tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r06c05.tif')                   
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r07c00.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r07c01.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r07c02.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r08c00.tif')
        tiles.append('/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/r08c01.tif')
 
all_line_text_poly = gpd.GeoDataFrame()
all_line_text_poly['geometry'] = None
geotif_crs = 32128
all_line_text_poly.crs = ("EPSG:" + str(geotif_crs))
all_line_text_poly.geometry = all_line_text_poly.geometry.to_crs(crs=geotif_crs)
all_line_text_poly.to_crs(crs=geotif_crs)
all_line_text_poly = all_line_text_poly.to_crs(epsg=geotif_crs)

for t in tiles:
    lines_of_text = recognize_text_in_local_image(t)
    temp_df = geocode_labels(t,lines_of_text)
    all_line_text_poly = gpd.GeoDataFrame(pd.concat([all_line_text_poly,temp_df], ignore_index=True), crs=all_line_text_poly.crs)
all_line_text_poly.to_file(os.path.join("/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/","all_line_text.shp"))

In [None]:
line_text_df=gpd.read_file("/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/all_line_text.shp")

print(line_text_df.crs)
# Change crs to one compatible with basemap
line_text_df = line_text_df.to_crs(epsg=3857) #3857
print(line_text_df.crs)

ax = line_text_df.plot(figsize=(40, 40), alpha=0.5, edgecolor='k')
#ctx.add_basemap(ax)

tile_polys_df=gpd.read_file("/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/tile_polys.shp")
tile_polys_df = tile_polys_df.to_crs(epsg=3857) #3857
tile_polys_df.boundary.plot(ax=ax,color="red",alpha=0.3)

line_text_df['coords'] = line_text_df['geometry'].apply(lambda x: x.representative_point().coords[:])
line_text_df['coords'] = [coords[0] for coords in line_text_df['coords']]
for idx, row in line_text_df.iterrows():
    plt.annotate(s=row['text'], xy=row['coords'],                 horizontalalignment='center')


In [None]:
glist = line_text_df['geometry']
tlist = line_text_df['text']
rows_to_remove = []
print(len(glist))
for i in range(len(glist)):
    for j in range(i + 1, len(glist)):
        intersect_poly = glist[i].intersection(glist[j])

        if(not intersect_poly.is_empty):
            if(glist[i].area < glist[j].area):
                # glist[i] is smaller
                if(intersect_poly.area/glist[i].area >.8):
                    print("remove i : ", i, j, "{:3.2%}".format(intersect_poly.area/glist[i].area), int(intersect_poly.area), " Remove: ", tlist[i], int(glist[i].area), " Keep: ", tlist[j], int(glist[j].area))
                    rows_to_remove.append(i)
            else:
                if(glist[i].area >= glist[j].area):  
                    if(intersect_poly.area/glist[j].area >.8):
                        print("remove j : ", i, j, "{:3.2%}".format(intersect_poly.area/glist[j].area), int(intersect_poly.area), " Keep: ", tlist[i], int(glist[i].area),  " Remove: ", tlist[j], int(glist[j].area))
                        rows_to_remove.append(j)
                    #else:             
                    #    print("keep both: ", i, j, "{:3.2%}".format(intersect_poly.area/glist[i].area), "{:3.2%}".format(intersect_poly.area/glist[j].area), int(intersect_poly.area), tlist[i], int(glist[i].area), tlist[j], int(glist[j].area))


In [None]:
print(rows_to_remove)
# remove the rows
line_text_df_deduplicated = line_text_df
line_text_df_deduplicated = line_text_df_deduplicated.drop(rows_to_remove)
print(len(line_text_df_deduplicated['geometry']))
#print(line_text_df_deduplicated['geometry'],type(line_text_df_deduplicated['geometry']))
#print(line_text_df_deduplicated['text'],type(line_text_df_deduplicated['text']))
#print(line_text_df_deduplicated['id'],type(line_text_df_deduplicated['id']))
print(line_text_df_deduplicated['coords'],type(line_text_df_deduplicated['coords']))
print(line_text_df_deduplicated)
print(line_text_df_deduplicated.columns)
#line_text_df_deduplicated.to_file(os.path.join("/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/","all_line_text_deduplicated.shp"))

print(type(line_text_df_deduplicated))
#we have to drop this since .shp files don't support tuples
line_text_df_deduplicated = line_text_df_deduplicated.drop(columns='coords')
line_text_df_deduplicated.to_file(os.path.join("/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/","all_line_text_deduplicated.shp"))
print(line_text_df_deduplicated.columns)


In [None]:
line_text_df=gpd.read_file("/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/all_line_text_deduplicated.shp")

print(line_text_df.crs)
# Change crs to one compatible with basemap
line_text_df = line_text_df.to_crs(epsg=3857) #3857
print(line_text_df.crs)

ax = line_text_df.plot(figsize=(20, 20), alpha=0.5, edgecolor='k')
ctx.add_basemap(ax)

tile_polys_df=gpd.read_file("/content/drive/MyDrive/MaskCNNhearths/HopewellFurnaceMapWork/tif_tiles/tile_polys.shp")
tile_polys_df = tile_polys_df.to_crs(epsg=3857) #3857
tile_polys_df.boundary.plot(ax=ax,color="red",alpha=0.3)

line_text_df['coords'] = line_text_df['geometry'].apply(lambda x: x.representative_point().coords[:])
line_text_df['coords'] = [coords[0] for coords in line_text_df['coords']]
for idx, row in line_text_df.iterrows():
    plt.annotate(s=row['text'], xy=row['coords'],                 horizontalalignment='center')




Download /content/line_text.shp to use it in other maps such as ArcGIS Map. 