In [1]:
import requests
import csv
import pandas as pd
import re

# imports for webscraping
from bs4 import BeautifulSoup, Comment
import folium
import numpy as np

In [2]:
def parse_regions(soup, breaks):
    spots_list = soup.find('div', attrs={'class':'block spot-list'})
    
    if spots_list:
        spots = spots_list.findAll('a', attrs={'class':'country'})
        for spot in spots:
            url = spot.attrs['href']
            req = requests.get(url)
            local_soup = BeautifulSoup(req.content, 'html.parser')
            found_spots = parse_regions(local_soup, breaks)
            breaks.extend(found_spots)
    else:
        try:
            regional_spots = soup.find('table', attrs={'class':'spotTable'}).findAll('a')
            return regional_spots
        except AttributeError as e:
            return []
    
    return []
    

In [3]:
url = 'https://surfing-waves.com/atlas.html'
req = requests.get(url)   # send request to the starting url

soup = BeautifulSoup(req.content, 'html.parser')

In [4]:
breaks = []
parse_regions(soup, breaks)

[]

In [6]:
data = []
for b in breaks:
    surl = b.attrs['href']
    sreq = requests.get(surl)
    ssoup = BeautifulSoup(sreq.content, 'html.parser')

    country = re.search(r'\/atlas\/\w+\/(\w+)', surl).group(1)
    
    latlong = re.search(r'=(-?\d+\.\d+),(-?\d+\.\d+)', ssoup.find('div', attrs={'class':'map big_map responsive_map'}).iframe.attrs['src'])
    lat = latlong.group(1)
    long = latlong.group(2)
    
    dets = ssoup.find('table', attrs={'class':'spot-details'})
    wave_type = dets.findAll('td')[1].text
    direction = dets.findAll('td')[2].text
    bottom = dets.findAll('td')[3].text
    difficulty = dets.findAll('td')[4].text
    
    data.append([country, b.text, lat, long, wave_type, direction, bottom, difficulty])
    # coords.append((country.text, spot.text, lat, long))

In [18]:
df = pd.DataFrame(np.array(data),
                   columns=['Country', 'Break', 'Lat', 'Long', 'Wave_type', 'Direction', 'Bottom', 'Difficulty'])
df.to_csv('surf_breaks.csv', index=False)

In [16]:
df.head()

Unnamed: 0,Country,Break,Lat,Long,Wave_type,Direction,Bottom,Difficulty
0,ghana,Busua Beach,4.80294370651245,-1.92466735839844,Beach break,Right & left,Sand,Beginners
1,ghana,The Point,5.4937686920166,-0.36616912484169,Point break,Right & left,Sand & Rock,Intermediate surfer
2,ghana,Till's,5.41222715377808,-0.46760559082031,Beach break,Right & left,Sand,Intermediate surfer
3,ghana,White Sands Point,5.41842174530029,-0.46181201934814,Point break,Right,Sand & Rock,Advanced surfer
4,madagascar,Flameballs,-23.64441871643066,43.57177734375,Reef break,Left,Boulders,Advanced surfer


In [14]:
# Make an empty map
m = folium.Map(location=[20,0], tiles="OpenStreetMap", zoom_start=3)

for i in range(len(df)):
    folium.Marker(
        location=[df.iloc[i]['Lat'], df.iloc[i]['Long']],
        popup=df.iloc[i]['Break'],
    ).add_to(m)

# Show the map again
m

In [45]:
surl = spots[0].attrs['href']
sreq = requests.get(surl)
ssoup = BeautifulSoup(sreq.content, 'html.parser')

In [47]:
dets = ssoup.find('table', attrs={'class':'spot-details'})

In [58]:
dets.findAll('td')

[<td>2</td>,
 <td>Beach break</td>,
 <td>Right &amp; left</td>,
 <td>Sand</td>,
 <td>Beginners</td>,
 <td>Empty</td>,
 <td>none</td>]

In [4]:
continents = soup.find('div', attrs={'class':'block spot-list'}).findAll('a', attrs={'class':'continent'})
countries = soup.find('div', attrs={'class':'block spot-list'}).findAll('a', attrs={'class':'country'})
locations = soup.find('div', attrs={'class':'block spot-list'}).findAll('a')

In [41]:
coords = []
# for country in countries:
#     # print(country.text)
#     curl = country.attrs['href']
#     creq = requests.get(curl)
#     csoup = BeautifulSoup(creq.content, 'html.parser')
    
#     spots = []
#     regions = csoup.find('div', attrs={'class':'block spot-list'})
#     # check if the country was broken down into regions
#     if regions:
#         for region in regions.findAll('a', attrs={'class':'country'}):
#             # print(region)
#             rurl = region.attrs['href']
#             rreq = requests.get(rurl)
#             rsoup = BeautifulSoup(rreq.content, 'html.parser')
            
#             # attempt to grab all spots for the region
#             try:
#                 regional_spots = rsoup.find('table', attrs={'class':'spotTable'}).findAll('a')
#                 spots.extend(regional_spots)
#             except AttributeError as e:
#                 print(f'{region.text} has no spots')
#                 continue
#     else:
#         # attempt to grab all spots for the country
#         try:
#             regional_spots = csoup.find('table', attrs={'class':'spotTable'}).findAll('a')
#             spots.extend(regional_spots)
#         except AttributeError as e:
#             print(f'{country.text} has no spots')
#             continue
    
    # find the latitude and longitude of each location by parsing the marker URL
