### Necessary Library


In [0]:
!pip install geopandas

In [0]:
# import the necessary library
import pandas as pd
import numpy as np 
import geopandas as gpd
from shapely import wkt

# Import the library for webscraping
from bs4 import BeautifulSoup
import urllib.request
import requests
import re

import time
import warnings
warnings.filterwarnings("ignore")

pd.options.display.max_columns = 400 # this will set limit of columns to 500
pd.options.display.max_rows = 400 # this will set limit of columns to 500
pd.options.display.max_colwidth = 200

### Authentification Google

In [146]:
# Authentification with google drive and colab
from google.colab import auth
auth.authenticate_user()
print('Authenticated')

Authenticated


In [147]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
# Authentification with google drive and colab
from google.colab import auth
auth.authenticate_user()

# Read google sheet
import gspread
from oauth2client.client import GoogleCredentials
gc = gspread.authorize(GoogleCredentials.get_application_default())

In [0]:
root_path = 'drive/My Drive/Personal Projects/2020/Sport_CASA/sport/data/'  

In [0]:
project_id = 'nature-casa'
!gcloud config set project {project_id}

### Step1 - Lausanne 2020 Venues Lists


#### Get the name of Lausanne Venues

In [0]:
url = 'https://www.lausanne2020.sport/en/venues'

In [0]:
# Url of the page to scrape
url = 'https://www.lausanne2020.sport/en/venues'
r = requests.get(url)
soup = BeautifulSoup(r.content)

# Find the list of all venues - soup request
article_content = soup.find_all('div',attrs={'class':'col-12 col-lg-4 grid__item--description'})

# Loop through the venues and append to list
venues_list = []
for i in range(len(article_content)):
    
    get_venue= article_content[i].find('h2').text
    venues_list.append(get_venue)

In [100]:
# Create Dataframe from Venues_List
data = pd.DataFrame(venues_list, columns=['venues'])
data['merge_venues'] = data['venues'].str.replace(' ','')
data

Unnamed: 0,venues,merge_venues
0,Lausanne,Lausanne
1,Les Diablerets,LesDiablerets
2,Leysin,Leysin
3,Villars,Villars
4,Vallée de Joux,ValléedeJoux
5,Les Tuffes,LesTuffes
6,Champéry,Champéry
7,St. Moritz,St.Moritz


#### Get the GeoLocation of the Venues - API Request

In [0]:
import urllib
import requests
import json

# Google Maps API Key
API_key = 'AIzaSyDNzmghohnGOjISTlxW5Vcs2R8orm5KEfc'

## Request with JSON output! 
## https://developers.google.com/places/web-service/search

def Get_LatLng(row):
  
    try:
        url = 'https://maps.googleapis.com/maps/api/geocode/json?address='+str(row['merge_venues'])+',Switzerland+&key='+str(API_key)        
        response = (requests.get(url).text)
        response_json = json.loads(response)
        return response_json  
    except Exception as error:
        raise error

In [0]:
# run the google API
data['API_response'] = data.apply(Get_LatLng, axis=1)

In [0]:
data.head()

In [0]:
def Get_GeoLocation(dataframe):
  
  # empty list to store the geolocation
  lat_list = []
  lng_list = []

  # loop through all the results to get location
  for row in range(len(dataframe)):
    api_rep = dataframe['API_response'][row]
    results = api_rep['results'][0]
    lat = results['geometry']['location']['lat']
    lat_list.append(lat)
    lng = results['geometry']['location']['lng']
    lng_list.append(lng)

  return lat_list, lng_list

In [136]:
data['lat'],data['lng'] = Get_GeoLocation(data)
data.head(3)

Unnamed: 0,venues,merge_venues,API_response,geolocation,lat,lng
0,Lausanne,Lausanne,"{'results': [{'address_components': [{'long_name': 'Lausanne', 'short_name': 'Lausanne', 'types': ['locality', 'political']}, {'long_name': 'Lausanne District', 'short_name': 'Lausanne District', ...","{'lat': 46.5196535, 'lng': 6.6322734}",46.519653,6.632273
1,Les Diablerets,LesDiablerets,"{'results': [{'address_components': [{'long_name': 'Les Diablerets', 'short_name': 'Les Diablerets', 'types': ['locality', 'political']}, {'long_name': 'Aigle District', 'short_name': 'Aigle Distr...","{'lat': 46.351389, 'lng': 7.158055999999999}",46.351389,7.158056
2,Leysin,Leysin,"{'results': [{'address_components': [{'long_name': 'Leysin', 'short_name': 'Leysin', 'types': ['locality', 'political']}, {'long_name': 'Aigle District', 'short_name': 'Aigle District', 'types': [...","{'lat': 46.3435634, 'lng': 7.012033}",46.343563,7.012033


#### Transform df into geopandas - save geojson

In [0]:
## Create a Point Geometry from Lat, Lng - Create GeoDataFrame
data_gpd = gpd.GeoDataFrame(data, geometry=gpd.points_from_xy(data.lng, data.lat))

In [145]:
data_gpd.head(2)

Unnamed: 0,venues,merge_venues,API_response,geolocation,lat,lng,geometry
0,Lausanne,Lausanne,"{'results': [{'address_components': [{'long_name': 'Lausanne', 'short_name': 'Lausanne', 'types': ['locality', 'political']}, {'long_name': 'Lausanne District', 'short_name': 'Lausanne District', ...","{'lat': 46.5196535, 'lng': 6.6322734}",46.519653,6.632273,POINT (6.63227 46.51965)
1,Les Diablerets,LesDiablerets,"{'results': [{'address_components': [{'long_name': 'Les Diablerets', 'short_name': 'Les Diablerets', 'types': ['locality', 'political']}, {'long_name': 'Aigle District', 'short_name': 'Aigle Distr...","{'lat': 46.351389, 'lng': 7.158055999999999}",46.351389,7.158056,POINT (7.15806 46.35139)


In [0]:
# Save the data into a GeoJson File
data_tosave = data_gpd[['venues','geometry']]
data_tosave.to_file(root_path+"lausanne2020_venues.geojson", driver='GeoJSON')

In [0]:
# save the data into google bucket!
data.to_csv('lausanne2020_venues.csv')
#!gsutil cp  'lausanne2020_venues.csv' gs://nature-london/lausanne2020_venues.csv

### Step 2 - Get the Sport List


In [34]:
sport_content = soup.find_all('div',attrs={'class':'col-12 col-lg-4 grid__item--sports'})
sport_content[0]

<div class="col-12 col-lg-4 grid__item--sports"><div class="pictograms"><a href="https://www.lausanne2020.sport/en/sports/ice_hockey"><figure><img alt="Ice Hockey" src="https://yog2020.s3.eu-west-3.amazonaws.com/assets/img/pictograms/white/ice_hockey.png" title="Ice Hockey"/></figure><span>Ice Hockey</span></a><a href="https://www.lausanne2020.sport/en/sports/figure-skating"><figure><img alt="Figure Skating" src="https://yog2020.s3.eu-west-3.amazonaws.com/assets/img/pictograms/white/figure_skating.png" title="Figure Skating"/></figure><span>Figure Skating</span></a><a href="https://www.lausanne2020.sport/en/sports/short_track_speed_skating"><figure><img alt="Short Track Speed Skating" src="https://yog2020.s3.eu-west-3.amazonaws.com/assets/img/pictograms/white/short_track_speed_skating.png" title="Short Track Speed Skating"/></figure><span>Short Track Speed Skating</span></a><a href="https://www.lausanne2020.sport/en/en-jeux!-festival?venues[]=Lausanne"><figure class="activities"><img a

In [35]:
for i in range(len(sport_content)):
    print(sport_content[i].find('span').text)

Ice Hockey
Alpine Skiing
Freestyle Skiing
Freestyle Skiing
Cross-Country Skiing
Biathlon
Curling
Bobsleigh
