# Mapping El Corrido de Gregorio 

### packages required {.unnumbered}

In [3]:
import geopandas as gpd
import pandas as pd
import folium

import re

from geopy.geocoders import OpenCage
import plotly.express as px 

import spacy
from spacy import displacy



### Extracting Name Enities (places) from text document {.unnumbered}

In [5]:
text = "elcorridodegregoriocortez.txt"
with open(text, 'r', encoding='utf-8') as c:
    text = c.read()
    
def clean_text(text):
    cleaned= re.sub(r'[":;,.“”]', "", text)
    return(cleaned)
text = clean_text(text)
#print(text)


TxGPE=[]
nlp = spacy.load("tx_trained_ner")
doc =nlp(text) 
#print(doc)
for ent in doc.ents:
    #print(ent.text, ent.label_)
    if ent.label_ == "GPE":
        TxGPE.append(ent.text)
#print(TxGPE)

ents = [(e.text, e.start_char, e.end_char, e.label_)for e in doc.ents]
entsname=[(e.text) for e in doc.ents]
print(entsname)

['Karnes', 'Gonzales', 'Belmont', 'Laredo', 'Encinal']


### Visulization: Highlighting Name Entities from text {.unnumbered}

In [7]:
displacy.render(doc, style ='ent', jupyter=True, page=True)

### Creating Dataframe {.unnumbered}

In [13]:
df = pd.DataFrame({'NER': entsname})
geolocator = OpenCage(api_key='358a05a403a643ec8cf37150dfed56c5')
geocode = lambda query: geolocator.geocode("%s, Texas" % query)
location = geolocator.geocode("Belmont Site", exactly_one=True)
df['Coordinates'] = df['NER'].apply(geocode)
df

GeocoderUnavailable: HTTPSConnectionPool(host='api.opencagedata.com', port=443): Max retries exceeded with url: /geocode/v1/json?key=358a05a403a643ec8cf37150dfed56c5&q=Belmont+Site (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1000)')))

### Creating GeoDataframe {.unnumbered}

In [None]:
gdf = gpd.tools.geocode(df.Coordinates, provider='ArcGIS')
gdf = gpd.GeoDataFrame(gdf, crs="EPSG:4326")
gdf["lat"]=gdf['geometry'].y
gdf ["lon"] = gdf['geometry'].x

gdf
gdf.to_json()

### Creating Mapping Visualization from El Corrido de Gregorio Cortez {.unnumbered}

In [None]:


# Ensure lat/lon columns exist
gdf["lat"] = gdf.geometry.y
gdf["lon"] = gdf.geometry.x

# Create a base map centered around the first coordinate or the mean location
m = folium.Map(location=[gdf["lat"].mean(), gdf["lon"].mean()], zoom_start=9, tiles="OpenStreetMap")

# Create a list of coordinate tuples (lat, lon)
coordinates = list(zip(gdf["lat"], gdf["lon"]))

# Add the line to the map
folium.PolyLine(
    locations=coordinates,
    color="blue",
    weight=3,
    opacity=0.7,
    zoom= 8
).add_to(m)

# Add markers with hover text (optional)
for _, row in gdf.iterrows():
    folium.Marker(
        location=[row["lat"], row["lon"]],
        popup=row["address"],  # Shows address when clicked
        tooltip=row["address"]  # Shows address on hover
    ).add_to(m)

# Display the map (if running in a Jupyter Notebook)
m
