In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
import datetime as dt

In [2]:
#read page with Beatiful Soup
headers = {'User-Agent': 
           'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'}

page = "https://www.worldfootball.net/teams/flamengo-rj/2020/3/"
pageTree = requests.get(page, headers=headers)
pageSoup = BeautifulSoup(pageTree.content, 'html.parser')

In [3]:
#find games from 2020
a_in_2020 = pageSoup.find_all("a", text=lambda t: t and '/2020' in t)

In [4]:
#extract all relevant info based on games from 2020
one_round = []
all_rounds = []
for a in a_in_2020:
    for td in a.parent.parent.find_all("td"):
        temp_str = td.text.rstrip().replace('\n','').replace('\t','').replace('\r','')
        one_round.append(temp_str)
    all_rounds.append(one_round)
    one_round = []

In [5]:
#insert the values in a new dataframe object
column_names = ['Competition','Date','Time','Venue','Temp1','Opponent','Result','Temp2']
df_rounds = pd.DataFrame(all_rounds, columns=column_names)

In [6]:
#add dummy starting times for non-existing ones
times_bool = df_rounds['Time'] == ''
df_rounds.loc[times_bool,'Time'] = '20:00'

In [7]:
def convert_date_time(date, time):
    #merge two columns in one
    date_converted = dt.datetime.strptime(date + ' ' + time, '%d/%m/%Y %H:%M')
    #adjust start_time for CET & end_time = start_time + 2
    start_time = date_converted #+ dt.timedelta(hours=1)
    end_time = date_converted + dt.timedelta(hours=2)
    #Required format for iCal event
    #start_time_ical = dt.datetime.strftime(start_time, "%Y%m%dT%H%M%S")
    #end_time_ical = dt.datetime.strftime(end_time, "%Y%m%dT%H%M%S")
    return start_time, end_time

def set_home_away(venue, opponent):
    if venue == 'A':
        return opponent + ' x FLAMENGO'
    else:
        return 'FLAMENGO x ' + opponent

In [8]:
#create StartTime & EndTime columns from Date & Time
df_rounds[['StartTime', 'EndTime']] = df_rounds.apply(
    lambda row: pd.Series(convert_date_time(row['Date'], row['Time'])), axis=1)

In [9]:
#create Summary depending on H or A match
df_rounds['Summary'] = df_rounds.apply(lambda row: set_home_away(row['Venue'], row['Opponent']), axis=1)

In [10]:
#drop unecessary columns
df_rounds = df_rounds.drop(['Competition','Date','Time','Venue','Temp1','Opponent','Temp2'], axis=1)

In [11]:
from icalendar import Calendar, Event
import datetime

In [12]:
cal = Calendar()

cal.add('prodid', '-//My calendar product//mxm.dk//')
cal.add('version', '2.0')

In [13]:
for i in df_rounds.index:
    event = Event()
    event.add('summary', df_rounds['Summary'][i])
    event.add('location', df_rounds['Result'][i])
    event.add('dtstart', df_rounds['StartTime'][i])
    event.add('dtend', df_rounds['EndTime'][i])
    # write evento to calendar element
    cal.add_component(event)

In [14]:
f = open('ics.html', 'wb')
f.write(cal.to_ical())
f.close()

In [15]:
#Flask
#from flask import Flask
#app = Flask(__name__)
#@app.route('/')
#def index():
#    return cal.to_ical()
#if __name__ == '__main__':
#    app.run()