In [83]:
'''  To Dos   '''
# create a search function to search for specific team
# to search for specific time
# to seach specific competition
# Change time to BST

import bs4
import requests
from datetime import datetime, timedelta
import pprint

pp = pprint.PrettyPrinter()

In [3]:
dateToday = datetime.today()

'''
- goal.com uses date to display live score
- but timezone used is IST. so 0:00 to 0:30 in BD it's a new date.
- But in IST it's still previous date.
- So, check if current time is between 0:00 t0 0:30
- If it is then change date to previous date
'''
if dateToday.hour == 0 and dateToday.minute < 30:
    dateToday -= timedelta(1)

date = dateToday.strftime('%Y-%m-%d')

'''
- preferred league names
- dictionary containing name and id of league given by goal.com
'''
leagueName = ('Premier League', 'Primera División', 'Serie A', 'Bundesliga',
              'Ligue 1', 'UEFA Champions League')
leagueId = {'8': 'Premier League', '7': 'Primera División', '9': 'Bundesliga',
            '13': 'Serie A', '16': 'Ligue 1', '10': 'UEFA Champions League'}

'''
- Many Comp has same Name; There is two Premier League in the website
- I only want EPL, 1st One
- So if I encounter Premier League I need to set its flag as 1
'''
leagueFlag = {'Premier League': 0, 'Primera División': 0, 'Serie A': 0,
              'Bundesliga': 0, 'Ligue 1': 0, 'UEFA Champions League': 0}

'''
- url for live score
- url for match fixture
'''
liveUrl = 'http://goal.com/en-in/live-scores'
fixtureUrl = 'http://goal.com/en-in/fixtures/'


In [9]:
def getLiveMatches():
    '''
    - send scores of live matches
    This will send data for live matches only
    So the data for matches which have finished or have not yet started
        won't be sent
    Goal uses 3 formats to show match status
    1. HH:MM (20:30)    Match not yet started
    2. FT               Match finished
    3. MM'              Match is running at MM minute
    '''

    res = requests.get(liveUrl)
    soup = bs4.BeautifulSoup(res.text, 'lxml')
    matchDay = soup.find(
        lambda tag: tag.name == 'section' and
        tag.get('class') == ['matchday'] and tag.get('data-day') == date
        )
    allComp = matchDay.find_all('table', {'class': 'matches'})

    '''
    - matches will be stored in a list where all competition will be in their own list
    - Format:  [ [comp1, (status, Home1, Score, Away1), .... ],
                 [comp2, (status, Home2, Score, Away2), .... ] ]
    '''
    matchList = []

    for allMatch in allComp:
        compList = []

        matches = allMatch.find_all('tbody', {'class': 'match clickable '})
        comp = allMatch.find('span', {'class': 'comp-title'}).string

        if comp not in leagueName:
            # This is out of my preferred league
            continue
        if leagueFlag[comp] == 1:
            # Already got this leagues data. This is duplicate
            continue
        else:
            leagueFlag[comp] = 1

        compList.append(comp)

        for matchData in matches:
            match = matchData.find('tr')

            status = match.find('span').string.split('\n')[1]

            if ':' in status or status == "FT":
                continue

            home = match.find('td', {'class': 'team'})
            away = home.find_next_sibling('td', {'class': 'team'})

            homeTeam = home.find('span').string
            awayTeam = away.find('span').string

            vs = match.find('td', {'class': 'vs'})
            result = vs.find('div').string.split('\n')[1]

            compList.append((status, homeTeam, result, awayTeam))
        matchList.append(compList)

    return matchList        

In [7]:
def getFixture(date):
    '''
    - Send Fixtures of matches played/to be played in "date"

    - Fixture will be stored in a list where all competition will be in their own list
    - Format:  [ [comp1, (status, Home1, Away1), .... ],
                 [comp2, (status, Home2, Away2), .... ] ]
    '''
    fixtureList = []

    url = fixtureUrl + date

    res = requests.get(url)
    soup = bs4.BeautifulSoup(res.text, 'lxml')

    allComp = soup.find_all('table', {'class': 'match-table '})

    for comp in allComp:

        id = comp.get('data-competition-id')

        if id not in leagueId:
            continue

        compList = []
        compList.append(leagueId[id])

        matches = comp.find_all('tr', {'class': 'clickable '})

        for match in matches:

            status = match.find('td', {'class': 'status'}).string.lstrip('\n')

            home = match.find('td', {'class': 'team'})
            away = home.find_next_sibling('td', {'class': 'team'})

            homeTeam = home.find('span').string
            awayTeam = away.find('span').string

            compList.append((status, homeTeam, awayTeam))

        fixtureList.append(compList)

    return fixtureList

In [8]:
def printMatchInfo(matchInfo):
    for matches in matchInfo:
        print(matches[0])
        print('-' * len(matches[0]))

        if(len(matches) == 1):
            print("No matches!")

        for match in matches[1:]:
            for info in match:
                print(info, end=" ")
            print()
        print()

In [10]:
matchInfo = getLiveMatches()
printMatchInfo(matchInfo)

print("Fixture")
fixtureInfo = getFixture('2017-05-08')
printMatchInfo(fixtureInfo)

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML Basic 1.1//EN" "http://www.w3.org/TR/xhtml-basic/xhtml-basic11.dtd">
<html data-status-code="200" xml:lang="en" xmlns="http://www.w3.org/1999/xhtml"> <head> <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/> <meta content="Get the latest live football scores, results &amp; fixtures from across the world, including Premier League, powered by Goal.com" name="description"/> <meta content="width=device-width, initial-scale=1" name="viewport"/> <title>Live Football Scores, Fixtures &amp; Results | Goal.com</title> <style type="text/css"> body,html{font-family:Arial,Helvetica,Helvetica Neue,sans-serif;font-size:16px;color:#081f2c;padding:0;margin:0}a{text-decoration:none;color:#00a9ce}h1,h2,h3,h4,h5,h6{font-weight:700;color:#081f2c;margin:10px 0}h1{font-size:18px}h2,h3{font-size:16px}h5,h6{font-size:14px}ul{padding-left:10px;margin-left:0}.page-container{padding:2px}.page-header,.widget-headline{font-size:18px}.page-header,.part-titl

AttributeError: 'NoneType' object has no attribute 'find_all'

In [95]:
liveUrl = 'https://www.goal.com/en-in/fixtures/2020-08-07'
res = requests.get(liveUrl)
soup = bs4.BeautifulSoup(res.text, 'lxml')

In [96]:
competitionSectionList = soup.find_all('div', class_='competition-matches')
# print(competitionSectionList)
liveInfo = list()
for competition in competitionSectionList:
#     print(competition)
    competitionTitle = extractionCompetitionTitleFromCompetitionSection(competition)
    competitionUrl = extractCompetitionUrlFromCompetitionSoup(competition)
    matchSoupList = extractAllMatchesFromCompetitionSoup(competition)
    matchInfoList = list()
    for matchSoup in matchSoupList:
        matchInfo = extractMatchInfo(matchSoup)
        matchInfoList.append(matchInfo)
    competitionInfo = dict()
    competitionInfo = {
        'title': competitionTitle,
        'url': competitionUrl,
        'matches': matchInfoList
    }
    liveInfo.append(competitionInfo)
    print(pp.pprint(competitionInfo))

{'matches': [{'score': {'away': '0', 'home': '1'},
              'state': 'FT',
              'teams': {'away': {'title': 'Eintracht Frankfurt',
                                 'url': '/en-in/match/basel-v-eintracht-frankfurt/1e1gxrkqmlrjc9pqu8p6wwd4a'},
                        'home': {'title': 'Basel',
                                 'url': '/en-in/match/basel-v-eintracht-frankfurt/1e1gxrkqmlrjc9pqu8p6wwd4a'}},
              'time': '07/08/20 (12:30 AM IST)'},
             {'score': {'away': '0', 'home': '1'},
              'state': 'FT',
              'teams': {'away': {'title': 'Olympiakos Piraeus',
                                 'url': '/en-in/match/wolverhampton-wanderers-v-olympiakos-piraeus/90x8ybb34ok5g3ai7rbb6a9uy'},
                        'home': {'title': 'Wolverhampton Wanderers',
                                 'url': '/en-in/match/wolverhampton-wanderers-v-olympiakos-piraeus/90x8ybb34ok5g3ai7rbb6a9uy'}},
              'time': '07/08/20 (12:30 AM IST)'}],
 'title': 

In [85]:
def extractionCompetitionTitleFromCompetitionSection(competitionSection):
    competitionTitleTag = competitionSection.find('a', class_='competition-title')
    competitionTitle = competitionTitleTag.text
    competitionTitle = competitionTitle.strip()
    return competitionTitle

def extractCompetitionUrlFromCompetitionSoup(competitionSoup):
    competitionUrlTag = competitionSoup.find('a', class_='competition-title')
    competitionUrl = competitionUrlTag['href']
    return competitionUrl

def extractAllMatchesFromCompetitionSoup(competitionSoup):
    matchTagList = competitionSoup.find_all('div', class_='match-row')
#     print(matchesTagList)
    return matchTagList
    
def extractMatchInfo(matchSoup):
    matchInfo = dict()
    
    matchStateSoup = matchSoup.find('span', class_='match-row__state')
    matchState = matchStateSoup.text if matchStateSoup else None
    matchTimeSoup = matchSoup.find('span', class_='match-row__date')
    matchTime = matchTimeSoup.text
    matchInfo['state'] = matchState
    matchInfo['time'] = matchTime
    
    homeTeamSoup = matchSoup.find('td', class_='match-row__team-home')
    homeTeamTitle = homeTeamSoup.text.strip()
    homeTeamUrl = homeTeamSoup.find('a', class_='match-row__link')['href']
    
    awayTeamSoup = matchSoup.find('td', class_='match-row__team-away')
    awayTeamTitle = awayTeamSoup.text.strip()
    awayTeamUrl = awayTeamSoup.find('a', class_='match-row__link')['href']
    
    scoreSoup = matchSoup.find_all('b', class_='match-row__goals')
    if scoreSoup:
        homeScore = scoreSoup[0].text
        awayScore = scoreSoup[1].text
        matchInfo['score'] = {
            'home': homeScore,
            'away': awayScore
        }
    
    matchInfo['teams'] = dict()
    matchInfo['teams']['home'] = {
        'title': homeTeamTitle,
        'url': homeTeamUrl
    }
    matchInfo['teams']['away'] = {
        'title': awayTeamTitle,
        'url': awayTeamUrl
    }
#     print(matchInfo)  
    return matchInfo

In [91]:
fixtureUrl = 'https://www.goal.com/en-in/fixtures/'
def getFixture(date):
    '''
    - Send Fixtures of matches played/to be played in "date"

    - Fixture will be stored in a list where all competition will be in their own list
    - Format:  [ [comp1, (status, Home1, Away1), .... ],
                 [comp2, (status, Home2, Away2), .... ] ]
    '''
    fixtureList = []

    url = fixtureUrl + date

    res = requests.get(url)
    soup = bs4.BeautifulSoup(res.text, 'lxml')

    allComp = soup.find_all('table', {'class': 'match-table '})
    
    print(allComp)

    for comp in allComp:

        id = comp.get('data-competition-id')

        if id not in leagueId:
            continue

        compList = []
        compList.append(leagueId[id])

        matches = comp.find_all('tr', {'class': 'clickable '})

        for match in matches:

            status = match.find('td', {'class': 'status'}).string.lstrip('\n')

            home = match.find('td', {'class': 'team'})
            away = home.find_next_sibling('td', {'class': 'team'})

            homeTeam = home.find('span').string
            awayTeam = away.find('span').string

            compList.append((status, homeTeam, awayTeam))

        fixtureList.append(compList)

    return fixtureList

In [92]:
getFixture('2020-08-19')

[]


[]