In [326]:
import pandas as pd
from datetime import datetime
import requests
from bs4 import BeautifulSoup
import re

# Scraping WSL's 2019 events' website

In [327]:

r = requests.get('https://www.worldsurfleague.com/events/2019/mct')

In [328]:
print(r.text[0:500])

<!doctype html>
<!--[if lt IE 7 ]><html lang="en" class="ie ie6 lte10 lte9 lte8 lte7 page-type--not-index page-type--events page-sub-type--events"><![endif]-->
<!--[if IE 7 ]><html lang="en" class="ie ie7 lte10 lte9 lte8 lte7 gt6 page-type--not-index page-type--events page-sub-type--events"><![endif]-->
<!--[if IE 8 ]><html lang="en" class="ie ie8 lte10 lte9 lte8 gt6 gt7 page-type--not-index page-type--events page-sub-type--events"><![endif]-->
<!--[if IE 9 ]><html lang="en" class="ie ie9 lte10 


## Checking event's tag

In [329]:

soup = BeautifulSoup(r.text, 'html.parser')

In [330]:
results = soup.find_all('td',{'class':'event-date-range first'})

In [331]:
results

[<td class="event-date-range first">Apr 3 - 13</td>,
 <td class="event-date-range first">Apr 17 - 27</td>,
 <td class="event-date-range first">May 13 - 25</td>,
 <td class="event-date-range first">May 29 - Jun 9</td>,
 <td class="event-date-range first">Jun 20 - 28</td>,
 <td class="event-date-range first">Jul 9 - 22</td>,
 <td class="event-date-range first">Aug 21 - Sep 1</td>,
 <td class="event-date-range first">Sep 19 - 21</td>,
 <td class="event-date-range first">Oct 3 - 13</td>,
 <td class="event-date-range first">Oct 16 - 28</td>,
 <td class="event-date-range first">Dec 8 - 20</td>]

## Looping through the tags to extract dates, event names and event locations

In [332]:
events_total=[]

for row in soup.find('tbody').find_all('tr'):
    
    date = row.find('td',{'class':'event-date-range first'})
    if date != None:
        
        events=[]
        
        str = '2019 ' + date.text
        date_c = str.replace('-', '')
        date_c = re.sub(r'(\d{4}) (\w{3}) (\d{1,2}) \s?(\w{3})? (\d{1,2})', '\\1 \\2 \\3', date_c)
        
        date_object = datetime.strptime(date_c, '%Y %b %d')
        date_formated = date_object.strftime('%Y-%m-%d')
        
        
    
    
   
        
        events.append(date_formated)
        name = row.find('a',{'class':'event-schedule-details__event-name'})
        events.append(name.text)
        location = row.find('span',{'class':'event-schedule-details__location'})
        events.append(location.text)
        events_total.append(events)
       
        
print(events_total)
        
     
    
                    

[['2019-04-03', 'Quiksilver Pro Gold Coast', 'Gold Coast, Queensland, Australia'], ['2019-04-17', 'Rip Curl Pro Bells Beach', 'Bells Beach, Victoria, Australia'], ['2019-05-13', 'Corona Bali Protected', 'Keramas, Bali, Indonesia'], ['2019-05-29', 'Margaret River Pro', 'Margaret River, Western Australia'], ['2019-06-20', 'Oi Rio Pro', 'Saquarema, Rio de Janeiro, Brazil'], ['2019-07-09', 'Corona Open J-Bay', 'Jeffreys Bay, Eastern Cape, South Africa'], ['2019-08-21', "Tahiti Pro Teahupo'o", 'Teahupoʻo, Tahiti'], ['2019-09-19', 'Freshwater Pro', 'Lemoore, California, United States'], ['2019-10-03', 'Quiksilver Pro France', 'Capbreton, Hossegor, Seignosse, Landes, Nouvelle-Aquitaine, France'], ['2019-10-16', 'MEO Rip Curl Pro Portugal', 'Peniche, Portugal'], ['2019-12-08', 'Billabong Pipe Masters', 'Banzai Pipeline, Oahu, Hawaii']]


## Creating dataframe with event's information

In [333]:
df = pd.DataFrame(events_total)


In [334]:
df.columns = ['Starting date', 'Championship', 'Location']

In [335]:
df

Unnamed: 0,Starting date,Championship,Location
0,2019-04-03,Quiksilver Pro Gold Coast,"Gold Coast, Queensland, Australia"
1,2019-04-17,Rip Curl Pro Bells Beach,"Bells Beach, Victoria, Australia"
2,2019-05-13,Corona Bali Protected,"Keramas, Bali, Indonesia"
3,2019-05-29,Margaret River Pro,"Margaret River, Western Australia"
4,2019-06-20,Oi Rio Pro,"Saquarema, Rio de Janeiro, Brazil"
5,2019-07-09,Corona Open J-Bay,"Jeffreys Bay, Eastern Cape, South Africa"
6,2019-08-21,Tahiti Pro Teahupo'o,"Teahupoʻo, Tahiti"
7,2019-09-19,Freshwater Pro,"Lemoore, California, United States"
8,2019-10-03,Quiksilver Pro France,"Capbreton, Hossegor, Seignosse, Landes, Nouvel..."
9,2019-10-16,MEO Rip Curl Pro Portugal,"Peniche, Portugal"


## Saving data into CSV file

In [336]:
df.to_csv('/Users/laurasg/Documents/Ironhack_data/PR03-project-web/output/wsl-webscraping.csv')

# Making an API request on World Weather Online to see the weather description for the first day of each competition

In [337]:
weather_total = []
for event in events_total:
    weather=[]
    
    
    
    api_url = f'http://api.worldweatheronline.com/premium/v1/past-weather.ashx?key=1591261241f544549a7142003201808&q={event[2]}&format=json&date={event[0]}'
    response= requests.get(api_url)
    max_temp = response.json()['data']['weather'][0]['maxtempC']
    weather.append(max_temp)
    min_temp = response.json()['data']['weather'][0]['mintempC']
    weather.append(min_temp)
    wind_speed = response.json()['data']['weather'][0]['hourly'][0]['windspeedKmph']
    weather.append(wind_speed)   
    visibility = response.json()['data']['weather'][0]['hourly'][0]['visibilityMiles']
    weather.append(visibility)
    weather_total.append(weather)
    
print(weather_total)
   


[['25', '22', '25', '5'], ['29', '21', '26', '6'], ['30', '23', '6', '6'], ['21', '12', '17', '6'], ['28', '17', '4', '6'], ['19', '11', '14', '6'], ['25', '11', '9', '6'], ['28', '15', '19', '6'], ['19', '14', '6', '6'], ['20', '13', '10', '6'], ['29', '22', '9', '6']]


## Creating a dataframe with the results

In [338]:
df_weather = pd.DataFrame(weather_total)

In [339]:
df_weather.columns= ['Max. temp.', 'Min. temp.', 'Wind speed Km/h', 'Visibility Miles']

In [340]:
df_weather

Unnamed: 0,Max. temp.,Min. temp.,Wind speed Km/h,Visibility Miles
0,25,22,25,5
1,29,21,26,6
2,30,23,6,6
3,21,12,17,6
4,28,17,4,6
5,19,11,14,6
6,25,11,9,6
7,28,15,19,6
8,19,14,6,6
9,20,13,10,6


## Saving date into a CSV file

In [341]:
df_weather.to_csv('/Users/laurasg/Documents/Ironhack_data/PR03-project-web/output/API-results.csv')

## Adding new columns into one dataframe

In [342]:
df['Max. temp C'] = df_weather['Max. temp.']
df

Unnamed: 0,Starting date,Championship,Location,Max. temp C
0,2019-04-03,Quiksilver Pro Gold Coast,"Gold Coast, Queensland, Australia",25
1,2019-04-17,Rip Curl Pro Bells Beach,"Bells Beach, Victoria, Australia",29
2,2019-05-13,Corona Bali Protected,"Keramas, Bali, Indonesia",30
3,2019-05-29,Margaret River Pro,"Margaret River, Western Australia",21
4,2019-06-20,Oi Rio Pro,"Saquarema, Rio de Janeiro, Brazil",28
5,2019-07-09,Corona Open J-Bay,"Jeffreys Bay, Eastern Cape, South Africa",19
6,2019-08-21,Tahiti Pro Teahupo'o,"Teahupoʻo, Tahiti",25
7,2019-09-19,Freshwater Pro,"Lemoore, California, United States",28
8,2019-10-03,Quiksilver Pro France,"Capbreton, Hossegor, Seignosse, Landes, Nouvel...",19
9,2019-10-16,MEO Rip Curl Pro Portugal,"Peniche, Portugal",20


In [343]:
df['Min. temp. C'] = df_weather['Min. temp.']
df

Unnamed: 0,Starting date,Championship,Location,Max. temp C,Min. temp. C
0,2019-04-03,Quiksilver Pro Gold Coast,"Gold Coast, Queensland, Australia",25,22
1,2019-04-17,Rip Curl Pro Bells Beach,"Bells Beach, Victoria, Australia",29,21
2,2019-05-13,Corona Bali Protected,"Keramas, Bali, Indonesia",30,23
3,2019-05-29,Margaret River Pro,"Margaret River, Western Australia",21,12
4,2019-06-20,Oi Rio Pro,"Saquarema, Rio de Janeiro, Brazil",28,17
5,2019-07-09,Corona Open J-Bay,"Jeffreys Bay, Eastern Cape, South Africa",19,11
6,2019-08-21,Tahiti Pro Teahupo'o,"Teahupoʻo, Tahiti",25,11
7,2019-09-19,Freshwater Pro,"Lemoore, California, United States",28,15
8,2019-10-03,Quiksilver Pro France,"Capbreton, Hossegor, Seignosse, Landes, Nouvel...",19,14
9,2019-10-16,MEO Rip Curl Pro Portugal,"Peniche, Portugal",20,13


In [344]:
df['Wind speed Km/h'] = df_weather['Wind speed Km/h']
df


Unnamed: 0,Starting date,Championship,Location,Max. temp C,Min. temp. C,Wind speed Km/h
0,2019-04-03,Quiksilver Pro Gold Coast,"Gold Coast, Queensland, Australia",25,22,25
1,2019-04-17,Rip Curl Pro Bells Beach,"Bells Beach, Victoria, Australia",29,21,26
2,2019-05-13,Corona Bali Protected,"Keramas, Bali, Indonesia",30,23,6
3,2019-05-29,Margaret River Pro,"Margaret River, Western Australia",21,12,17
4,2019-06-20,Oi Rio Pro,"Saquarema, Rio de Janeiro, Brazil",28,17,4
5,2019-07-09,Corona Open J-Bay,"Jeffreys Bay, Eastern Cape, South Africa",19,11,14
6,2019-08-21,Tahiti Pro Teahupo'o,"Teahupoʻo, Tahiti",25,11,9
7,2019-09-19,Freshwater Pro,"Lemoore, California, United States",28,15,19
8,2019-10-03,Quiksilver Pro France,"Capbreton, Hossegor, Seignosse, Landes, Nouvel...",19,14,6
9,2019-10-16,MEO Rip Curl Pro Portugal,"Peniche, Portugal",20,13,10


In [345]:
df['Visibility Miles'] = df_weather['Visibility Miles']


Unnamed: 0,Starting date,Championship,Location,Max. temp C,Min. temp. C,Wind speed Km/h,Visibility Miles
0,2019-04-03,Quiksilver Pro Gold Coast,"Gold Coast, Queensland, Australia",25,22,25,5
1,2019-04-17,Rip Curl Pro Bells Beach,"Bells Beach, Victoria, Australia",29,21,26,6
2,2019-05-13,Corona Bali Protected,"Keramas, Bali, Indonesia",30,23,6,6
3,2019-05-29,Margaret River Pro,"Margaret River, Western Australia",21,12,17,6
4,2019-06-20,Oi Rio Pro,"Saquarema, Rio de Janeiro, Brazil",28,17,4,6
5,2019-07-09,Corona Open J-Bay,"Jeffreys Bay, Eastern Cape, South Africa",19,11,14,6
6,2019-08-21,Tahiti Pro Teahupo'o,"Teahupoʻo, Tahiti",25,11,9,6
7,2019-09-19,Freshwater Pro,"Lemoore, California, United States",28,15,19,6
8,2019-10-03,Quiksilver Pro France,"Capbreton, Hossegor, Seignosse, Landes, Nouvel...",19,14,6,6
9,2019-10-16,MEO Rip Curl Pro Portugal,"Peniche, Portugal",20,13,10,6


# Final dataframe with information from the API and Web Scraping

In [347]:
df

Unnamed: 0,Starting date,Championship,Location,Max. temp C,Min. temp. C,Wind speed Km/h,Visibility Miles
0,2019-04-03,Quiksilver Pro Gold Coast,"Gold Coast, Queensland, Australia",25,22,25,5
1,2019-04-17,Rip Curl Pro Bells Beach,"Bells Beach, Victoria, Australia",29,21,26,6
2,2019-05-13,Corona Bali Protected,"Keramas, Bali, Indonesia",30,23,6,6
3,2019-05-29,Margaret River Pro,"Margaret River, Western Australia",21,12,17,6
4,2019-06-20,Oi Rio Pro,"Saquarema, Rio de Janeiro, Brazil",28,17,4,6
5,2019-07-09,Corona Open J-Bay,"Jeffreys Bay, Eastern Cape, South Africa",19,11,14,6
6,2019-08-21,Tahiti Pro Teahupo'o,"Teahupoʻo, Tahiti",25,11,9,6
7,2019-09-19,Freshwater Pro,"Lemoore, California, United States",28,15,19,6
8,2019-10-03,Quiksilver Pro France,"Capbreton, Hossegor, Seignosse, Landes, Nouvel...",19,14,6,6
9,2019-10-16,MEO Rip Curl Pro Portugal,"Peniche, Portugal",20,13,10,6
