In [1]:
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
import requests
import re

In [3]:
url = 'https://webpoint.usawaterpolo.com/wp15/Companies/Clubs.wp'
response = get(url)

In [4]:
print(response.text[:500])

<!DOCTYPE html>
<html>
<head>
<title>USA Water Polo Clubs</title>
<meta charset="utf-8" />
<meta name="keywords" content="">
<meta name="description" content="">
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-status-bar-style" content="black">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="IE=edge" />	
<link rel="stylesheet" type="text/css" media="screen" href="/wp_Media/css/


In [5]:
html_soup = BeautifulSoup(response.text, 'html.parser')
type(html_soup)

bs4.BeautifulSoup

In [6]:
print(html_soup.prettify()[:20000])

<!DOCTYPE html>
<html>
 <head>
  <title>
   USA Water Polo Clubs
  </title>
  <meta charset="utf-8"/>
  <meta content="" name="keywords"/>
  <meta content="" name="description"/>
  <meta content="yes" name="apple-mobile-web-app-capable"/>
  <meta content="black" name="apple-mobile-web-app-status-bar-style"/>
  <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
  <meta content="IE=edge" http-equiv="X-UA-Compatible">
   <link href="/wp_Media/css/Normalize.css?version=2017_06_19" media="screen" rel="stylesheet" type="text/css"/>
   <link href="/wp_Media/css/Spinner.css?version=2018_06_09" media="screen" rel="stylesheet" type="text/css"/>
   <link href="/wp_Media/css/jquery-ui-1.8.23.custom.css?version=2018_09_05" media="screen" rel="stylesheet" type="text/css"/>
   <link href="/wp_Media/css/PageElements.css?version=2019_12_6" rel="stylesheet" type="text/css"/>
   <link href="/wp_Media/css/FE_Style.css?version=2019_05_12" media="screen" rel="stylesheet" type="text/css"

In [7]:
print(html_soup.title)

<title>USA Water Polo Clubs</title>


In [8]:
options = html_soup.find("select", attrs={"id":"CompanyParentID"})
option_list = options.find_all("option")

In [9]:
option_list

[<option value="">-Select One-</option>,
 <option value="10008">Central California Zone</option>,
 <option value="10011">Coastal California Zone</option>,
 <option value="17972">International Zone</option>,
 <option value="10003">Midwest Zone</option>,
 <option value="10010">Mountain Zone</option>,
 <option value="1795">Northeast Zone</option>,
 <option value="10005">Pacific Northwest &amp; Hawaii Zone</option>,
 <option value="10009">Pacific Southwest Zone</option>,
 <option value="10006">Pacific Zone</option>,
 <option value="10002">Southeast Zone</option>,
 <option value="10007">Southern Pacific Zone</option>,
 <option value="10004">Southwest Zone</option>]

In [10]:
zones = []
for option in option_list[1:]:
    zones.append(option.attrs['value'])

In [11]:
zones

['10008',
 '10011',
 '17972',
 '10003',
 '10010',
 '1795',
 '10005',
 '10009',
 '10006',
 '10002',
 '10007',
 '10004']

In [12]:
clubs = []

for zone in zones:
    payload = {
    'CompanyParentID': zone,
    'CompanyName':'',
    'CompanyState':'',
    'geo_Zip':'',
    'geo_Miles':'',
    'CustomCompanyGroup1':'',
    'Submit':'Go'
    }
    
    r = requests.post(url, data=payload)
    r_soup = BeautifulSoup(r.text, 'html.parser')
    
    club_info = r_soup.find("div", attrs={"class":"mobilelist fe-list-clubs"})
    
    for li in club_info.find_all("li"):
        club = {}
        club['club_name'] = li.h3.text
        if li.find("p", attrs={"class":None}):
            club['club_loc'] = li.find("p", attrs={"class":None}).text
        else: 
            club['club_loc'] = "Unknown"
        club['club_id'] = li.find("div", attrs={"class":"pad-5 org-icons-container"}).text
        club['club_zone'] = zone
        clubs.append(club)

In [13]:
clubs

[{'club_name': ' 209 Water Polo Club',
  'club_loc': 'Primary Pool:\r\n\t\t\t\tBuhach Colony High School\r\n\t\t\t\t1800 Buhach RoadAtwater, CA\r\n\t\t\t\t',
  'club_id': '\r\n\t\t\t\t \r\n\t\t\t\t\tClub ID: 31318\n\xa0\r\n\t\t\t\t\r\n\t\t\t',
  'club_zone': '10008'},
 {'club_name': ' A-Town Water Polo Club',
  'club_loc': 'Primary Pool:\r\n\t\t\t\tAtascadero High School\r\n\t\t\t\t1 High School HillAtascadero, CA\xa0\xa093422\r\n\t\t\t\t',
  'club_id': '\r\n\t\t\t\t \r\n\t\t\t\t\tClub ID: 30234\n\xa0\r\n\t\t\t\t\r\n\t\t\t',
  'club_zone': '10008'},
 {'club_name': ' Alumni Water Polo Club',
  'club_loc': 'Primary Pool:\r\n\t\t\t\tGranite Bay High School Pool\r\n\t\t\t\t1 Grizzly WayGranite Bay, CA\xa0\xa095746\r\n\t\t\t\t',
  'club_id': '\r\n\t\t\t\t \r\n\t\t\t\t\tClub ID: 23883\n\xa0\r\n\t\t\t\t\r\n\t\t\t',
  'club_zone': '10008'},
 {'club_name': ' American River Water Polo Club',
  'club_loc': 'Primary Pool:\r\n\t\t\t\tRio Americano High School\r\n\t\t\t\t4540 American River DrSacram

In [14]:
clubs_df = pd.DataFrame(clubs)

In [15]:
clubs_df.head(20)

Unnamed: 0,club_name,club_loc,club_id,club_zone
0,209 Water Polo Club,Primary Pool:\r\n\t\t\t\tBuhach Colony High Sc...,\r\n\t\t\t\t \r\n\t\t\t\t\tClub ID: 31318\n \r...,10008
1,A-Town Water Polo Club,Primary Pool:\r\n\t\t\t\tAtascadero High Schoo...,\r\n\t\t\t\t \r\n\t\t\t\t\tClub ID: 30234\n \r...,10008
2,Alumni Water Polo Club,Primary Pool:\r\n\t\t\t\tGranite Bay High Scho...,\r\n\t\t\t\t \r\n\t\t\t\t\tClub ID: 23883\n \r...,10008
3,American River Water Polo Club,Primary Pool:\r\n\t\t\t\tRio Americano High Sc...,\r\n\t\t\t\t \r\n\t\t\t\t\tClub ID: 85\n \r\n\...,10008
4,Arroyo Grande Water Polo,Primary Pool:\r\n\t\t\t\tArroyo Grande High Sc...,\r\n\t\t\t\t \r\n\t\t\t\t\tClub ID: 21063\n \r...,10008
5,Asa Aquatics,Primary Pool:\r\n\t\t\t\tEl Capitan High Schoo...,\r\n\t\t\t\t \r\n\t\t\t\t\tClub ID: 19513\n \r...,10008
6,Bakersfield Water Polo Club,Primary Pool:\r\n\t\t\t\tGarces Memorial High ...,\r\n\t\t\t\t \r\n\t\t\t\t\tClub ID: 16629\n \r...,10008
7,BarDown Water Polo Academy,Primary Pool:\r\n\t\t\t\tCSU Stanislaus\r\n\t\...,\r\n\t\t\t\t \r\n\t\t\t\t\tClub ID: 31674\n \r...,10008
8,Blazer Water Polo Club,Primary Pool:\r\n\t\t\t\tGolden West High Scho...,\r\n\t\t\t\t \r\n\t\t\t\t\tClub ID: 16120\n \r...,10008
9,Bulldog Water Polo Club,Primary Pool:\r\n\t\t\t\tFresno State Aquatics...,\r\n\t\t\t\t \r\n\t\t\t\t\tClub ID: 31612\n \r...,10008


In [16]:
clubs_df.to_csv('club_info.csv')