forked from joshknopp/scratchpad
-
Notifications
You must be signed in to change notification settings - Fork 0
/
nfl_scraper.py
93 lines (43 loc) · 1.96 KB
/
nfl_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import requests
import json
from bs4 import BeautifulSoup
# URL for the NFL regular season schedule
url = 'https://www.nfl.com/schedules/2023/reg1/'
# Send a GET request to the URL
response = requests.get(url)
# Create a Beautiful Soup object from the response content
soup = BeautifulSoup(response.content, 'html.parser')
# Create an empty list to store the scraped data
data = []
# Loop through each week of the regular season
for week in range(1, 19):
# Construct the URL for the week's schedule
week_url = f'https://www.nfl.com/schedules/2023/reg{week}/'
# Send a GET request to the week's schedule URL
week_response = requests.get(week_url)
# Create a Beautiful Soup object from the week's schedule content
week_soup = BeautifulSoup(week_response.content, 'html.parser')
# Find the HTML elements containing the data you want to scrape
# and extract the relevant information using Beautiful Soup's
# built-in methods such as find() and find_all().
# Assuming you want to scrape the game schedule details
game_elements = week_soup.find_all('div', class_='nfl-o-matchup-group__container')
# Extract the desired information from each game element
for game_element in game_elements:
# Extract the necessary data from the game element
date = game_element.find('div', class_='nfl-c-matchup-time').text.strip()
teams = game_element.find_all('span', class_='nfl-c-matchup-team-name')
team1 = teams[0].text.strip()
team2 = teams[1].text.strip()
# Create a dictionary to store the extracted data
game_data = {
'week': week,
'date': date,
'team1': team1,
'team2': team2
}
# Add the game data to the list
data.append(game_data)
# Output the scraped data as JSON to the console
# Output the scraped data as JSON to the console
print(json.dumps(data, indent=4))