# Imports & setup

In [1]:
# Import Splinter, BeautifulSoup, and Selenium
from splinter import Browser
from bs4 import BeautifulSoup as soup
from selenium.webdriver.chrome.service import Service

In [2]:
# Import Pandas
import pandas as pd

In [3]:
# Import time datetime
import time
import datetime as dt

In [4]:
# Get user input for dates to loop through
start_date_str = input('Enter a start date: ')
end_date_str = input('Enter an end date: ')

Enter a start date:  March 1, 2024
Enter an end date:  March 1, 2024


In [5]:
# Convert user input to datetime objects
start_date = dt.datetime.strptime(start_date_str, '%B %d, %Y').date()
end_date = dt.datetime.strptime(end_date_str, '%B %d, %Y').date()

In [6]:
# Initialize an empty list to hold the CDL data
cdl_data = []

In [7]:
# Helper function to decide winner of map
def return_result(our_score, opp_score):
    if our_score > opp_score:
        return 1
    return 0

In [8]:
# Set up Splinter

# PC
my_service = Service(executable_path = 'C:\\Users\\David\\.wdm\\drivers\\chromedriver\\win64\\121.0.6167.184\\chromedriver-win32\\chromedriver.exe')

# Laptop
# my_service = Service(executable_path = 'C:\\Users\\David Harler Jr\\.wdm\\drivers\\chromedriver\\win64\\121.0.6167.184\\chromedriver-win32\\chromedriver.exe')

browser = Browser('chrome', service=my_service)

# Loop through BP.gg webpages & matches

In [9]:
# Initialize iterator
cur_date = start_date

# Initialize row nums to loop through
row_nums = [1, 2, 3, 4, 6, 7, 8, 9]

# Print the beginning of the logging.
print("-----------------------------")
print("Beginning Data Retrieval     ")
print("-----------------------------")
print("")

# Use while loop to iterate through date range
while cur_date <= end_date:

    # Visit BP.gg matches webpage for the current date
    url = 'https://www.breakingpoint.gg/matches/' + cur_date.strftime('%Y-%m-%d')
    browser.visit(url)

    # Wait for webpage to load
    time.sleep(5)
    
    # Parse the HTML for the current date
    html = browser.html
    bp_soup = soup(html, 'html.parser')

    # Get the matches from the current date
    matches = bp_soup.find('div', class_ = 'mantine-1hiehc6')

    # Check to see if there were any matches on the current date by checking the number of descendants of the matches subtree
    if len(list(matches.descendants)) > 2:

        # Visit the webpage for each match that took place on the current date
        for match in matches.find_all('a'):
            url = 'https://www.breakingpoint.gg' + match['href']
            browser.visit(url)
            
            # Wait for webpage to load
            time.sleep(5)

            # Get the match ID
            match_id = match['href'].split('/')[2]
            
            # Parse the HTML for the current match
            match_html = browser.html
            match_soup = soup(match_html, 'html.parser')
            
            # Get the teams
            teams = match_soup.find('div', class_ = 'mantine-7o6j5m').contents[0].contents
            team_a = teams[0].contents[0].contents[0].string
            team_b = teams[1].contents[-1].contents[-1].contents[0].string

            # Get the series scores
            team_a_series_score = int(teams[1].contents[0].string)
            team_b_series_score = int(teams[1].contents[2].string)
            
            # Get the mapset and the total number of maps played for the current match
            mapset = match_soup.find('div', class_ = 'mantine-g92whd').find_all('div', class_ = 'mantine-155beqj')
            total_maps = len(mapset)
            
            # Get the scoreboards
            scoreboards = match_soup.find_all('div', class_ = 'mantine-Tabs-panel mantine-v1hkmm')
            
            # Test print statement
            print(f"{team_a} vs. {team_b} | Match {match_id} on {cur_date} ")
            
            # Iterate through each map for the current match
            for map_num in range(1, total_maps + 1):
            
                # Get the map name
                map_name = list(mapset[map_num - 1].contents[1].strings)[0]
                
                # Get the gamemode
                gamemode = mapset[map_num - 1].contents[0].string
            
                # Get the team scores
                scores = mapset[map_num - 1].contents[2].contents[1].contents
                team_a_map_score = int(scores[0].string)
                team_b_map_score = int(scores[2].string)
            
                # Test print statements
                print(f"Map {map_num} of {total_maps} | {map_name} {gamemode}")
                # print(f"{team_a} {team_a_map_score} - {team_b_map_score} {team_b}")
            
                # Iterate down the rows of the current scoreboard
                for row_num in row_nums:
            
                    # Get the player stats
                    player = scoreboards[map_num - 1].contents[0].contents[1].contents[row_num].contents[0].string
                    kills = int(scoreboards[map_num - 1].contents[0].contents[1].contents[row_num].contents[1].string)
                    deaths = int(scoreboards[map_num - 1].contents[0].contents[1].contents[row_num].contents[2].string)
                    kd = float(scoreboards[map_num - 1].contents[0].contents[1].contents[row_num].contents[3].string)
                    plus_minus = kills - deaths
                    dmg = int("".join(scoreboards[map_num - 1].contents[0].contents[1].contents[row_num].contents[5].string.split(',')))
            
                    if row_num < 5:
                        # Append data to our cdl_list 
                        cdl_data.append({"Match": match_id, "Date": cur_date, "Day": cur_date.strftime('%A'),
                                         "Player": player, "Team": team_a,
                                         "Map #": map_num, "Map": map_name, "Gamemode": gamemode, "Kills": kills, 
                                         "Deaths": deaths, "K/D": kd, "+/-": plus_minus, "Damage": dmg, 
                                         "Team Score": team_a_map_score, 
                                         "Map Result": return_result(team_a_map_score, team_b_map_score), 
                                         "Series Result": return_result(team_a_series_score, team_b_series_score)})
                    else:
                        # Append data to our cdl_list 
                        cdl_data.append({"Match": match_id, "Date": cur_date, "Day": cur_date.strftime('%A'),
                                         "Player": player, "Team": team_b,
                                         "Map #": map_num, "Map": map_name, "Gamemode": gamemode, "Kills": kills, 
                                         "Deaths": deaths, "K/D": kd, "+/-": plus_minus, "Damage": dmg, 
                                         "Team Score": team_b_map_score, 
                                         "Map Result": return_result(team_b_map_score, team_a_map_score), 
                                         "Series Result": return_result(team_b_series_score, team_a_series_score)})

            
            # Test print statement
            print(f"Series Final: {team_a} {team_a_series_score} - {team_b_series_score} {team_b}")
            print("")
    
    # Move to next date
    cur_date += dt.timedelta(days=1)

    # Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

-----------------------------
Beginning Data Retrieval     
-----------------------------



AttributeError: 'NoneType' object has no attribute 'descendants'

In [10]:
# Convert our list of cdl dictionaries to a dataframe
cdl_df = pd.DataFrame(cdl_data)
cdl_df

Unnamed: 0,Match,Date,Day,Player,Team,Map #,Map,Gamemode,Kills,Deaths,K/D,+/-,Damage,Team Score,Map Result,Series Result
0,27193,2023-12-08,Friday,aBeZy,Atlanta FaZe,1,Karachi,Hardpoint,24,24,1.00,0,4659,250,1,1
1,27193,2023-12-08,Friday,Cellium,Atlanta FaZe,1,Karachi,Hardpoint,21,19,1.11,2,4032,250,1,1
2,27193,2023-12-08,Friday,Drazah,Atlanta FaZe,1,Karachi,Hardpoint,25,22,1.14,3,4296,250,1,1
3,27193,2023-12-08,Friday,Simp,Atlanta FaZe,1,Karachi,Hardpoint,33,23,1.43,10,5188,250,1,1
4,27193,2023-12-08,Friday,Capsidal,Boston Breach,1,Karachi,Hardpoint,24,26,0.92,-2,3731,238,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2435,27251,2024-02-25,Sunday,Simp,Atlanta FaZe,5,Invasion,Search & Destroy,7,9,0.78,-2,1158,4,0,0
2436,27251,2024-02-25,Sunday,HyDra,New York Subliners,5,Invasion,Search & Destroy,11,7,1.57,4,2104,6,1,1
2437,27251,2024-02-25,Sunday,KiSMET,New York Subliners,5,Invasion,Search & Destroy,10,5,2.00,5,1560,6,1,1
2438,27251,2024-02-25,Sunday,Sib,New York Subliners,5,Invasion,Search & Destroy,3,8,0.38,-5,1014,6,1,1


# Export data

In [11]:
from sqlalchemy import create_engine
from config import db_password

In [12]:
# Connect to SQL db
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/cdl_db"
engine = create_engine(db_string)
cdl_df.to_sql(name = 'cdl_data', con = engine, if_exists = 'replace')

440