<p>
El Corrido de Gregorio Cortez: 
the largest manhunt in U.S. history from June 14, 1901 to June 22, 1901</p>

<p>Based on lyrics from the Corrido de Gregorio Cortez: Variant X English translation by Américo Parede</p>

## Packages and Libraries used


In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np

from geograpy import places
import re

import geopy
from geopy.geocoders import ArcGIS
#from geopy.extra.rate_limiter import RateLimiter

import shapely
from shapely.geometry import Point
from shapely.wkt import loads

import contextily as cx
import matplotlib.pyplot as plt #to make sure there are no errors when plotting a graph
import pyproj
import plotly.express as px 

import spacy
from spacy import displacy

import locationtagger
nlp = spacy.load("en_core_web_sm")

import branca
import jinja2
import requests
import folium

## NER: Finding Places within the lyrics 
<p> We will be searching for Name Entity locations with the use of a customized pipeline and trained data of Texas municipalities, unincorporated communities, and ghost towns.</p> 
<p>For trained data, list of Ghost towns, municipalities, and unincorporated places in Texas can be found here [dianita.956/Corridos](https://github.com/dianita956/Corridos)</p>


In [None]:
text = "corrido corpus\ElCorridodeGregorioCortez_X.txt"
with open(text, 'r', encoding='utf-8') as c:
    text = c.read()
    
def clean_text(text):
    cleaned= re.sub(r'[":;,.“”]', "", text)
    return(cleaned)
text = clean_text(text)
#print(text)


TxGPE=[]
nlp = spacy.load("tx_trained_ner")
doc =nlp(text) 
#print(doc)
for ent in doc.ents:
    #print(ent.text, ent.label_)
    if ent.label_ == "GPE":
        TxGPE.append(ent.text)
print(TxGPE)

In [None]:
ents = [(e.text, e.start_char, e.end_char, e.label_)for e in doc.ents]
print(ents)

ents = [(e.text)for e in doc.ents]
print(ents)

In [None]:
displacy.render(doc, style ='ent', jupyter=True, page=True)

## Geoparsing: Places into geographic identifiers


In [None]:
df = pd.DataFrame(ents, columns=["NER_Places"])
geolocator = ArcGIS(user_agent='CorridosMap')
geocode = lambda query: geolocator.geocode("%s, Texas" % query)
df['Location'] = df['NER_Places'].apply(geocode)
df

In [None]:
gdf = gpd.tools.geocode(df.Location, provider='ArcGIS')
gdf

## Plotly: Mapping Places


In [None]:
px.set_mapbox_access_token(open("mapboxtoken").read())
fig = px.line_mapbox(gdf,
                        lat=gdf.geometry.y,
                        lon=gdf.geometry.x,
                        hover_name="address",
                        )

fig.update_layout(mapbox_style="stamen-terrain", mapbox_zoom=6, mapbox_center_lat = 29,
    margin={"r":0,"t":0,"l":0,"b":0})

fig.show()

## Resources {.appendix}
