In [30]:
import requests
import re
from bs4 import BeautifulSoup
import json
import pandas as pd
from pandas import DataFrame as df

In [2]:
page = requests.get("https://www.golflink.com/golf-courses/va/arlington/")
soup = BeautifulSoup(page.text, 'html.parser')

In [189]:
## TODO: pull address from Regex
# find distnace from apartment
from geopy.distance import geodesic
from geopy.geocoders import Nominatim


class Course:
    def __init__(self, name, num_holes, yardage, course_type, distance, address=None):
        self.name = name
        self.num_holes = num_holes.split(' ')[0]
        self.yardage = yardage.split(' ')[0]
        self.course_type = course_type
        self.distance = distance.split(' ')[0]
        self.address = address
            
    def get_address(self, geolocator):
        states = ["District of Columbia", "Virginia", "Maryland"]
        if self.address:
            for state in states:
                new_addr = self.address + " " + state
                course_location = geolocator.geocode(new_addr)
                if course_location:
                    break
            return course_location
        else:
            return None
    
    @property
    def distance_from_home(self):
        if self.address:
            geolocator = Nominatim(user_agent="testing")
            home = geolocator.geocode("1205 Half St SE Washington, DC")
            course = self.get_address(geolocator)
            if course:
                apartment = (home.latitude, home.longitude)
                course_addr = (course.latitude, course.longitude)
                distance = geodesic(apartment, course_addr).miles
                return distance
            else:
                return 0
        else:
            return 0
    
    def __repr__(self):
        return f'''
        -------------------------------------
        {self.name}
        -
        {self.num_holes} holes
        {self.yardage} yards
        {self.course_type}
        {str(round(self.distance_from_home, 2))} miles from home
        '''
        # {self.distance} miles to the center of Arlington
        
        
class CourseContainer:
    def __init__(self, name):
        self.name = name
        self.courses: List[Course] = []
#         self.df = pd.DataFrame(columns=["Course Name", "# of Holes", "Yardage", "Course Type", "Distance", "Address"])

    def add(self, course: Course) -> None:
        self.courses.append(course)
        
    def run(self):
        for course in self.courses:
            yield course
    
    def save(self, filename: str):
        new_courses = []
        for course in self.courses:
            new_courses.append([course.name, course.num_holes, course.yardage, course.course_type, course.distance_from_home, course.address])
        self.df = pd.DataFrame(new_courses, columns=["Course Name", "# of Holes", "Yardage", "Course Type", "Distance", "Address"])
        self.df.to_csv(filename, index=False, header=True)
            
c = CourseContainer("test")

courses = soup.find_all(class_ = 'resultContainer')
for course in courses[:]:

    course_name = course.h3.text
    container = course.find(class_ = 'textcontainer')
    
    street_address_re = re.compile(r"\d+[ ](?:[A-Za-z0-9.-]+[ ]?)+(?:Avenue|Lane|Road|Boulevard|Drive|Street|Pike|Turn|Ave|Dr|Rd|Blvd|Ln|St|Tpke|Hwy)\.?")
#     city_state_address_re = re.compile(r"\d+[ ](?:[A-Za-z0-9.-]+[ ]?)+(?:Avenue|Lane|Road|Boulevard|Drive|Street|Ave|Dr|Rd|Blvd|Ln|St)(?:.|\n)+(?:VA|MD|DC)\s(?:\d{5})")
    city_state_address_re = re.compile(r"[A-z]+,\s(?:VA|MD|DC)\s(?:\d{5})")
    
    m = re.search(r"([18|9]\sholes)(?:.|\n)+(\d,\d{3} yards)(?:.|\n)+(Public|Municipal|Private)(.|\n)+(\d{1,3}\s+mile)", container.text)
    street_address = re.search(street_address_re, container.text)
    city_state = re.search(city_state_address_re, container.text)
    try:
        print(street_address.group(0) + ", " + city_state.group(0))
    except AttributeError as e:
        print(container.text)
#         print("--- {}".format(city_state))
#         print(e)

    if m:
        num_holes = m.group(1)
        yardage = m.group(2)
        course_type = m.group(3)
        distance = m.group(5).replace("  ", " ") + "s"
        if m.group(4) == "1":
            distance = m.group(4) + distance
        if address:
            test = Course(course_name, num_holes, yardage, course_type, distance, address.group(0))
        else:
            test = Course(course_name, num_holes, yardage, course_type, distance)
        c.add(test)

1700 Army Navy Dr, Arlington, VA 22202
1700 Army Navy Dr, Arlington, VA 22202
1700 Army Navy Dr, Arlington, VA 22202
3017 N Glebe Rd, Arlington, VA 22207
972 Ohio Dr, Washington, DC 20024
972 Ohio Dr, Washington, DC 20024
972 Ohio Dr, Washington, DC 20024
6600 Little River Tpke, Alexandria, VA 22312
6023 Fort Hunt Rd, Alexandria, VA 22307
2600 Benning Rd, Washington, DC 20002
6700 Telegraph Rd, Alexandria, VA 22310
6100 16th St, Washington, DC 20011
5601 River Rd, Bethesda, MD 20816
6100 Connecticut Ave, Chase, MD 20815
7900 Lee Hwy, Church, VA 22042
1641 Tucker Rd, Washington, MD 20744
7900 Connecticut Ave, Chase, MD 20815
8600 Burdette Rd, Bethesda, MD 20817
7900 Telegraph Rd, Alexandria, VA 22315
800 Maple Ave, Vienna, VA 22180
8500 River Rd, Bethesda, MD 20817
8500 River Rd, Bethesda, MD 20817
8301 Old Keene Mill Rd, Springfield, VA 22152
10000 Oaklyn Dr, Potomac, MD 20854
3315 Old Lee Hwy, Fairfax, VA 22030
3315 Old Lee Hwy, Fairfax, VA 22030
7601 Bradley Blvd, Bethesda, MD 20817


In [80]:
for course in c.run():
    print(course.yardage)
    print(course.course_type)
    break

3,317
Private


In [82]:
c.save("courses_v2.csv")

In [28]:
!pip install geopy
import geopy

You should consider upgrading via the '/Users/grwills/Development/projects/golf_scraper/venv/bin/python -m pip install --upgrade pip' command.[0m


In [29]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="testing")
location = geolocator.geocode("1515 N Queen St Arlington")
print(location.address)

1515, North Queen Street, Colonial Village, Arlington, Arlington County, Virginia, 22209, United States


In [30]:
print((location.latitude, location.longitude))

(38.89111575440858, -77.07703275229085)


In [31]:
# from geopy.distance import geodesic
# apartment = (location.latitude, location.longitude)
# home_addr = geolocator.geocode("3109 Argent Path")
# print(home_addr)
# home = (home_addr.latitude, home_addr.longitude)
# distance = geodesic(apartment, home).miles

3109, Argent Path, Ellicott City, Howard County, Maryland, 21042, United States


In [153]:
test_address = "1641 Tucker Rd MD 20744"

In [154]:
geolocator = Nominatim(user_agent="testing")
home = geolocator.geocode("1205 Half St SE Washington, DC")
course_location = geolocator.geocode(test_address)

In [155]:
if course_location:
    distance = geodesic((course_location.longitude, course_location.latitude), (home.longitude, home.latitude)).miles
    print(round(distance, 2))
else:
    print("Couldn't find the address for the course.")

2.16
