# El corrido de Gregorio Cortez

## Mapping the manhunt of Rio Grande border folk hero Gregorio Cortez:  the largest manhunt in U.S. history.

### June 14, 1901 to June 22, 1901

### based on lyrics from the Corrido de Gregorio Cortez (variant x english translation by Americo Paredes)

#### project author Diane Lopez

In [None]:
import voila
import geopandas as gpd

import geograpy
from geograpy import extraction
from geograpy import places
import re
from geograpy.labels import Labels
#from geograpy import geograpy-nltk

import geopy
from geopy.geocoders import Nominatim
import time

import pandas as pd
import numpy as np

import shapely
from shapely.geometry import Point

import contextily as ctx
import matplotlib.pyplot as plt #to make sure there are no errors when plotting a graph
import plotly.express as px
import plotly.graph_objects as go

#from mordecai import Geoparser
import nltk

import spacy
from spacy import displacy
#from spacy.lang.en import English
#from spacy.lang.es import Spanish
#from spacy.pipeline import EntityRuler
#from spacy.tokens import Doc
#from spacy.training import Example
#from spacy.language import Language

import json
import random

In [None]:
nlp = spacy.load("en_core_web_lg")

In [None]:
text = "corrido corpus\ElCorridodeGregorioCortez_X.txt"
with open(text, 'r', encoding='utf-8') as c:
    text = c.read()
    
def clean_text(text):
    cleaned= re.sub(r'[":;,.“”]', "", text)
    return(cleaned)
text = clean_text(text)
#print(text)


TxGPE=[]
nlp = spacy.load("tx_ner_model")
doc =nlp(text) 
#print(doc)
for ent in doc.ents:
    #print(ent.text, ent.label_)
    if ent.label_ == "GPE":
        TxGPE.append(ent.text)
print(TxGPE)

In [None]:
ents = [(e.text, e.start_char, e.end_char, e.label_)for e in doc.ents]
print(ents)

ents = [(e.text)for e in doc.ents]
print(ents)

In [None]:
TxGPEStr = ' '.join([str(elem) for elem in TxGPE])
print(TxGPEStr)

In [None]:
spacy.displacy.render(doc, style ='ent')

Geoparsing: Finding places from the corrido lyrics

In [None]:
LOC = geograpy.extraction.Extractor(text=text)
LOC.find_geoEntities()
print(LOC.places)

In [None]:
pc = places.PlaceContext(TxGPEStr)

pc.set_countries()
print (pc.countries) #['United States']

pc.set_regions()
print(pc.regions) #['Texas'])

pc.set_cities()
print(pc.cities) #['Brownsville'])

print(pc.address_strings) #['Brownsville','Texas, United States'])

Laredo, Tejas

In [None]:
geolocator = Nominatim(user_agent="CorridosMap")

In [None]:
laredo=pc.cities[0]
print (laredo)

need to know the country code https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2#Current_codes

In [None]:
laredo_info = geolocator.geocode(laredo)
laredo_lat = (laredo_info.latitude) 
laredo_long= (laredo_info.longitude)
print(laredo_info)
print(laredo_long,',', laredo_lat)

Belmont, Tejas/Texas

In [None]:
belmont='Belmont, Gonzales County,Texas'
#belmont=pc.cities[1]
belmont_info=geolocator.geocode(belmont)
belmont_lat = (belmont_info.latitude)
belmont_long= (belmont_info.longitude)
print(belmont_info)
print(belmont_long,',', belmont_lat)

Gonzales, Tejas

In [None]:

#gonzales='Gonzales,Texas'
gonzales=pc.cities[2]
gonzales_info=geolocator.geocode(gonzales)
gonzales_lat = (gonzales_info.latitude) 
gonzales_long =(gonzales_info.longitude)
#gonzales=gonzales_info, gonzales_coordinates
print(gonzales_lat, ',', gonzales_long)
print(gonzales_info)

dataframe

In [None]:
greg = pd.DataFrame({'Place':[gonzales, 'Belmont', laredo], 'State': ['Texas', 'Texas', 'Texas'], 
'Latitude':[gonzales_lat, belmont_lat, laredo_lat], 'Longitude': [gonzales_long, belmont_long, laredo_long]})

In [None]:
greg.head()

Setting Geometry- https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoDataFrame.set_geometry.html#

Creating a GeoDataFrame from a DataFrame with coordinates https://geopandas.org/en/stable/gallery/create_geopandas_from_pandas.html

In [None]:
greg_gpd = gpd.GeoDataFrame(
    greg, geometry=gpd.points_from_xy(greg.Longitude, greg.Latitude),  crs="EPSG:4326")

figure out later shapely 2.0 

In [None]:
print(greg_gpd.head())
#greg_gpd.plot()

In [None]:
#greg_gpd_basemap, greg_gpd_extent = ctx.bounds2img(*greg_gpd.total_bounds, zoom=10,   
                                            #source=ctx.providers.OpenStreetMap.Mapnik )

In [None]:
px.set_mapbox_access_token(open("mapboxtoken").read())

fig = px.scatter_mapbox(greg_gpd, 
                        color='Place',
                        animation_group='Place',
                        animation_frame=['Gonzales', 'Belmont', 'Laredo'],
                        lat=greg_gpd.geometry.y, 
                        lon=greg_gpd.geometry.x, 
                        hover_name='Place',
                        size='Latitude',
                        zoom=5,
                        width=500,
                        height=500,
                        title=("Gregorio Cortez on the run (June 14 to 22, 1901)"),
                        mapbox_style='carto-positron'                               
                        )

fig.show()

In [None]:
greg_gpd.to_json()

In [None]:
import os
greg_gpd.to_file(r"C:\Users\dmlpz\Corridos", driver="GeoJSON")

In [None]:
fig.write_html("../corridos/gregmap.html")