## Grabbing the Data
-----

### Setup:
---

In [12]:
# Import dependencies
from pathlib import Path
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup


### Grab NFL Game Data from Past Years:
---

In [13]:
# helper function to pull text from the html element
# if error occurs - return 'N/A'
def getHTMLValue(input, parent=False):
    try:
        if parent:
            return input.parent.text
        else:
            return input.text
    except:
        return 'N/A'

In [14]:
# helper function to replace text in the results string.
# if error occurs - return the original value
def parseLabels(rawData, throwAway):
    try:
        return rawData.replace(throwAway, '')
    except:
        return rawData

In [15]:
# Set Range of Years
years_list = [2022]
years_list

[2022]

In [11]:
# Set List of Teams
teams_list = ["Arizona Cardinals",
    "Atlanta Falcons",
    "Baltimore Ravens",
    "Buffalo Bills",
    "Carolina Panthers",
    "Chicago Bears",
    "Cincinnati Bengals",
    "Cleveland Browns",
    "Dallas Cowboys",
    "Denver Broncos",
    "Detroit Lions",
    "Green Bay Packers",
    "Houston Texans",
    "Indianapolis Colts",
    "Jacksonville Jaguars",
    "Kansas City Chiefs",
    "Las Vegas Raiders",
    "Los Angeles Chargers",
    "Los Angeles Rams",
    "Miami Dolphins",
    "Minnesota Vikings",
    "New England Patriots",
    "New Orleans Saints",
    "New York Giants",
    "New York Jets",
    "Philadelphia Eagles",
    "Pittsburgh Steelers",
    "San Francisco 49ers",
    "Seattle Seahawks",
    "Tampa Bay Buccaneers",
    "Tennessee Titans",
    "Washington Commanders",
    ]

In [29]:
# Try With a Single Year (2022) First
for year in years_list:
    for team in teams_list:
        team_format = team.replace(' ', '_')
        url = f'https://en.wikipedia.org/wiki/{year}_{team_format}_season'

        wiki_tables = pd.read_html(url, match='Opponent')

        for table in wiki_tables:
            # if the table has more than 5 rows, that's our results table
            if (len(table) > 5):
                results_df = table

        results_df['team'] = team
        results_df['weather_condition'] = ''
        results_df['temp_f'] = ''
        results_df['temp_c'] = ''
        results_df['time'] = ''
        results_df['city'] = ''
        results_df['state'] = ''

        # screen scrape the game data to get the weather, time, and location
        team_response = requests.get(url)
        team_response_html = team_response.content.decode('utf-8')
        team_response_parsed = BeautifulSoup(team_response_html)

        uls = team_response_parsed.find_all('b', string="Date")
        for ul in uls:
            game_date = parseLabels(getHTMLValue(ul.parent), 'Date: ')
            game_weather = parseLabels(getHTMLValue(ul.parent.parent.find('b', string="Game weather"), parent=True), 'Game weather: ')
            
            weather_parts = game_weather.split(',')

            conditions = weather_parts[0]

            if (len(weather_parts) > 1):
                temps = weather_parts[1].split('°')
                temp_f = temps[0]
                celcius_part = temps[1].split('(')
                temp_c = celcius_part[1]
            else:
                # domed stadium - assume constant temp
                temp_f = 72
                temp_c = 22

            game_time = parseLabels(getHTMLValue(ul.parent.parent.find('b', string="Game time"), parent=True), 'Game time: ')
            game_location = parseLabels(getHTMLValue(ul.parent.parent.parent.find('p')), 'at ').replace('\n', '')

            game_location_parts = game_location.split(',')
            results_df.loc[results_df['Date'] == game_date, 'weather_condition'] = conditions
            results_df.loc[results_df['Date'] == game_date, 'temp_f'] = int(temp_f)
            results_df.loc[results_df['Date'] == game_date, 'temp_c'] = int(temp_c)
            results_df.loc[results_df['Date'] == game_date, 'time'] = game_time
            results_df.loc[results_df['Date'] == game_date, 'city'] = game_location_parts[1].strip()
            results_df.loc[results_df['Date'] == game_date, 'state'] = game_location_parts[2].strip()

        # export to csv here - by year and team
        results_df.to_csv(f'../03-Wrangling_Data/Grabbing_Data_Exports/{year}_{team_format}.csv')


### Export Past Game Data as CSV Files:
---

In [None]:
# Export CSV's for Each Team to ../03-Wrangling_Data/Grabbing_Data_Exports
