In [None]:
# import necessary packages
import requests
from bs4 import BeautifulSoup
import pandas as pd
from sqlalchemy import create_engine
!pip install mysqlclient
from google.colab import userdata

# CBS Sports Web Scrape

In [None]:
# get url and make request to it
url = 'https://www.cbssports.com/nba/teams/UTA/utah-jazz/schedule/'

extended_request = requests.get(url)

In [None]:
# double check request status code
extended_request

In [None]:
# see the text respose of the request
extended_request.text

In [None]:
# parse using beautifulsoup
soup = BeautifulSoup(extended_request.text, 'html.parser')

In [None]:
# print response and make more appealing
print(soup.prettify())

In [None]:
# finding similar class for all games in stats
# <tr class="TableBase-bodyTr">
games = soup.findAll('tr', attrs={'class': 'TableBase-bodyTr'})

In [None]:
# checking output of games
games

In [None]:
# checking type of games
type(games)

In [None]:
# <span class="CellGameDate">
dates = soup.findAll('span', attrs={'class': 'CellGameDate'})

In [None]:
# seeing result of
dates

# strip the span class to get just the date
for date in dates:
    print("Date:", date.text.strip())

In [None]:
# confirming type for dates
type(dates)

In [None]:
# <span class="CellGame-win"> or <span class="CellGame-lose">
outcomes = soup.findAll('span', class_=lambda x: x and ('CellGame-win' in x or 'CellGame-lose' in x))

In [None]:
# seeing result of outcome
outcomes

# filtering outcome for just W or L
for outcome in outcomes:
    print("Outcome:", outcome.text.strip())

In [None]:
# confirming type for outcome
type(outcomes)

In [None]:
# for loop going into game to print
for game in games:
    # Date
    # <span class="CellGameDate">
    date = game.find('span', attrs={'class': 'CellGameDate'})
    if date is not None:
        print("Date:", date.text.strip())

    # Outcome
    # <span class="CellGame-win"> or <span class="CellGame-lose">
    outcome = game.find('span', class_=lambda x: x and ('CellGame-win' in x or 'CellGame-lose' in x))
    if outcome is not None:
        print("Outcome:", outcome.text.strip())

    print('-' * 70)

In [None]:
# create empty dictionary to append to
game_data = {
    'date': [],
    'outcome': [],
}

for game in games:
    # Date
    # <span class="CellGameDate">
    date = game.find('span', attrs={'class': 'CellGameDate'})
    if date is not None:
        game_data['date'].append(date.text.strip())
    else:
        game_data['date'].append('N/A')  # Append placeholder if outcome is not found

    # Outcome
    # <span class="CellGame-win"> or <span class="CellGame-lose">
    outcome = game.find('span', class_=lambda x: x and ('CellGame-win' in x or 'CellGame-lose' in x))
    if outcome is not None:
        game_data['outcome'].append(outcome.text.strip())
    else:
        game_data['outcome'].append('N/A')  # Append placeholder if outcome is not found


    print('-' * 70)

In [None]:
# checking if dictionary was properly made
game_data

In [None]:
# convert game data into dataframe
games_df = pd.DataFrame(game_data)

In [None]:
# double checking creation of data frame and seeing first 5 rows
games_df.head(5)

In [None]:
# Save DataFrame to CSV file
games_df.to_csv('games_data.csv', index=False)

In [None]:
# adding secret for connection password
db_password = userdata.get('RDS_PASSWORD')

In [None]:
# create engine to connect with database
engine = create_engine(f'mysql+mysqldb://admin:{db_password}@sql-project.ca9jkrwdnacm.us-east-1.rds.amazonaws.com/JazzData')

In [None]:
# transitioning the dataframe into sql
games_df.to_sql('GameOutcomes', engine, if_exists='replace', index=False)

# NBA Web Scrape

In [None]:
# assigning url as a variable
url = 'https://stats.nba.com/stats/teamgamelog?DateFrom=&DateTo=&LeagueID=00&Season=2023-24&SeasonType=Regular%20Season&TeamID=1610612762'

# headers for authentication
headers  = {
    'Connection': 'keep-alive',
    'Accept': 'application/json, text/plain, */*',
    'x-nba-stats-token': 'true',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36',
    'x-nba-stats-origin': 'stats',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-Mode': 'cors',
    'Referer': 'https://stats.nba.com/',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-US,en;q=0.9',
}

# making get request
game_request = requests.get(url, headers=headers)

In [None]:
# checking status code
game_request

In [None]:
# getting data through indexing and printing as json
response = requests.get(url, headers=headers).json()

In [None]:
game_results = response['resultSets'][0]['rowSet']

In [None]:
game_results

In [None]:
columns = [
                "Team_ID",
                "Game_ID",
                "date",
                "MATCHUP",
                "WL",
                "W",
                "L",
                "W_PCT",
                "MIN",
                "FGM",
                "FGA",
                "FG_PCT",
                "FG3M",
                "FG3A",
                "FG3_PCT",
                "FTM",
                "FTA",
                "FT_PCT",
                "OREB",
                "DREB",
                "REB",
                "AST",
                "STL",
                "BLK",
                "TOV",
                "PF",
                "PTS"
]

In [None]:
# making df
nba_df = pd.DataFrame(game_results, columns = columns)

In [None]:
# checking df
nba_df.head()

In [None]:
# turning into csv
nba_df.to_csv('nba_results.csv', index=False)

In [None]:
# adding secret for connection password
db_password = userdata.get('RDS_PASSWORD')

In [None]:
# create engine to connect with database
engine = create_engine(f'mysql+mysqldb://admin:{db_password}@sql-project.ca9jkrwdnacm.us-east-1.rds.amazonaws.com/JazzData')

In [None]:
# transitioning the dataframe into sql
nba_df.to_sql('TeamGameData', engine, if_exists='replace', index=False)