# Imports & setup

In [1]:
# Import Splinter, BeautifulSoup, and Selenium
from splinter import Browser
from bs4 import BeautifulSoup as soup
from selenium.webdriver.chrome.service import Service

In [2]:
# Import Pandas
import pandas as pd

In [3]:
# Import time datetime
import time
import datetime as dt

In [4]:
# Get user input for dates to loop through
start_date_str = input('Enter a start date: ')
end_date_str = input('Enter an end date: ')

Enter a start date:  March 1, 2024
Enter an end date:  March 3, 2024


In [5]:
# Convert user input to datetime objects
start_date = dt.datetime.strptime(start_date_str, '%B %d, %Y').date()
end_date = dt.datetime.strptime(end_date_str, '%B %d, %Y').date()

In [6]:
# Initialize an empty list to hold the CDL data
cdl_data = []

In [7]:
# Helper function to decide winner of map
def return_result(our_score, opp_score):
    if our_score > opp_score:
        return 1
    return 0

In [8]:
# Set up Splinter

# PC
my_service = Service(executable_path = 'C:\\Users\\David\\.wdm\\drivers\\chromedriver\\win64\\121.0.6167.184\\chromedriver-win32\\chromedriver.exe')

# Laptop
# my_service = Service(executable_path = 'C:\\Users\\David Harler Jr\\.wdm\\drivers\\chromedriver\\win64\\121.0.6167.184\\chromedriver-win32\\chromedriver.exe')

browser = Browser('chrome', service=my_service)

In [31]:
# Visit bp webpage
url = 'https://www.breakingpoint.gg/matches/'
browser.visit(url)

# Wait for webpage to load
time.sleep(5)

In [32]:
# Parse the HTML 
html = browser.html
bp_soup = soup(html, 'html.parser')

In [33]:
# Find the current match and current match ID
cur_match = bp_soup.find('div', class_ = 'mantine-vdx6qn')
cur_match_ID = int(cur_match.contents[1]['href'].split('/')[2])

In [34]:
# Visit bp match page for current match
url = 'https://www.breakingpoint.gg/match/' + str(cur_match_ID)
browser.visit(url)

# Wait for webpage to load
time.sleep(5)

In [35]:
# Parse the HTML
match_html = browser.html
match_soup = soup(match_html, 'html.parser')

In [36]:
# Get the match date and set as current date
looking_for_date = match_soup.find('div', class_ = 'mantine-7c77qh')
cur_date = dt.datetime.strptime(looking_for_date.string, '%m/%d/%Y, %H:%M:%S %p').date()

# Loop through BP.gg match webpages

In [37]:
# Initialize scoreboard row nums to loop through
scoreboard_row_nums = [1, 2, 3, 4, 6, 7, 8, 9]

# Print the beginning of the logging.
print("-----------------------------")
print("Beginning Data Retrieval     ")
print("-----------------------------")
print("")

# Use while loop to iterate through date range in reverse order
while cur_date >= start_date:

    # Set current match_ID
    cur_match_ID -= 1

    # Visit bp match page for current match
    url = 'https://www.breakingpoint.gg/match/' + str(cur_match_ID)
    browser.visit(url)

    # Wait for webpage to load
    time.sleep(5)
    
    # Parse the HTML
    match_html = browser.html
    match_soup = soup(match_html, 'html.parser')

    # Get the match date and set as current date
    looking_for_date = match_soup.find('div', class_ = 'mantine-7c77qh')
    cur_date = dt.datetime.strptime(looking_for_date.string, '%m/%d/%Y, %H:%M:%S %p').date()

    # Check to see if match date falls within specified date range
    if (cur_date >= start_date) & (cur_date <= end_date):

        # Get the teams
        teams = match_soup.find('div', class_ = 'mantine-7o6j5m').contents[0].contents
        team_a = teams[0].contents[0].contents[0].string
        team_b = teams[1].contents[-1].contents[-1].contents[0].string

        # Get the series scores
        team_a_series_score = int(teams[1].contents[0].string)
        team_b_series_score = int(teams[1].contents[2].string)

        # Get the mapset and the total number of maps played
        mapset = match_soup.find('div', class_ = 'mantine-g92whd').find_all('div', class_ = 'mantine-155beqj')
        total_maps = len(mapset)

        # Get the scoreboards
        scoreboards = match_soup.find_all('div', class_ = 'mantine-Tabs-panel mantine-v1hkmm')
        
        # Test print statement
        print(f"{team_a} vs. {team_b} | Match {cur_match_ID} on {cur_date} ")

        # Iterate through each map for the current match
        for map_num in range(1, total_maps + 1):
        
            # Get the map name
            map_name = list(mapset[map_num - 1].contents[1].strings)[0]
            
            # Get the gamemode
            gamemode = mapset[map_num - 1].contents[0].string
        
            # Get the team scores
            scores = mapset[map_num - 1].contents[2].contents[1].contents
            team_a_map_score = int(scores[0].string)
            team_b_map_score = int(scores[2].string)
        
            # Test print statements
            print(f"Map {map_num} of {total_maps} | {map_name} {gamemode}")
            # print(f"{team_a} {team_a_map_score} - {team_b_map_score} {team_b}")
        
            # Iterate down the rows of the current scoreboard
            for row_num in scoreboard_row_nums:
        
                # Get the player stats
                player = scoreboards[map_num - 1].contents[0].contents[1].contents[row_num].contents[0].string
                kills = int(scoreboards[map_num - 1].contents[0].contents[1].contents[row_num].contents[1].string)
                deaths = int(scoreboards[map_num - 1].contents[0].contents[1].contents[row_num].contents[2].string)
                kd = float(scoreboards[map_num - 1].contents[0].contents[1].contents[row_num].contents[3].string)
                plus_minus = kills - deaths
                dmg = int("".join(scoreboards[map_num - 1].contents[0].contents[1].contents[row_num].contents[5].string.split(',')))
        
                if row_num < 5:
                    # Append data to our cdl_list 
                    cdl_data.append({"match_id": cur_match_ID, "match_date": cur_date, "match_day": cur_date.strftime('%A'),
                                     "player": player, "team": team_a,
                                     "map_num": map_num, "map_name": map_name, "gamemode": gamemode, "kills": kills, 
                                     "deaths": deaths, "kd": kd, "plus_minus": plus_minus, "dmg": dmg, 
                                     "team_score": team_a_map_score, 
                                     "map_result": return_result(team_a_map_score, team_b_map_score), 
                                     "series_result": return_result(team_a_series_score, team_b_series_score)})
                else:
                    # Append data to our cdl_list 
                    cdl_data.append({"match_id": cur_match_ID, "match_date": cur_date, "match_day": cur_date.strftime('%A'),
                                     "player": player, "team": team_b,
                                     "map_num": map_num, "map_name": map_name, "Gamemode": gamemode, "kills": kills, 
                                     "deaths": deaths, "kd": kd, "plus_minus": plus_minus, "dmg": dmg, 
                                     "team_score": team_b_map_score, 
                                     "map_result": return_result(team_b_map_score, team_a_map_score), 
                                     "series_result": return_result(team_b_series_score, team_a_series_score)})

        
        # Test print statement
        print(f"Series Final: {team_a} {team_a_series_score} - {team_b_series_score} {team_b}")
        print("")

# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

-----------------------------
Beginning Data Retrieval     
-----------------------------

Minnesota RØKKR vs. Los Angeles Thieves | Match 27259 on 2024-03-03 
Map 1 of 5 | Invasion Hardpoint
Map 2 of 5 | Invasion Search & Destroy
Map 3 of 5 | Invasion Control
Map 4 of 5 | Skidrow Hardpoint
Map 5 of 5 | Terminal Search & Destroy
Series Final: Minnesota RØKKR 2 - 3 Los Angeles Thieves

Atlanta FaZe vs. Toronto Ultra | Match 27258 on 2024-03-03 
Map 1 of 3 | Karachi Hardpoint
Map 2 of 3 | Rio Search & Destroy
Map 3 of 3 | Highrise Control
Series Final: Atlanta FaZe 3 - 0 Toronto Ultra

Miami Heretics vs. New York Subliners | Match 27257 on 2024-03-03 
Map 1 of 3 | Sub Base Hardpoint
Map 2 of 3 | Invasion Search & Destroy
Map 3 of 3 | Invasion Control
Series Final: Miami Heretics 0 - 3 New York Subliners

Los Angeles Guerrillas vs. Seattle Surge | Match 27256 on 2024-03-02 
Map 1 of 3 | Karachi Hardpoint
Map 2 of 3 | Invasion Search & Destroy
Map 3 of 3 | Karachi Control
Series Final: Los

# Export data

In [38]:
# Convert our list of cdl dictionaries to a dataframe
cdl_df = pd.DataFrame(cdl_data)
cdl_df

Unnamed: 0,Match,Date,Day,Player,Team,Map #,Map,Gamemode,Kills,Deaths,K/D,+/-,Damage,Team Score,Map Result,Series Result
0,27259,2024-03-03,Sunday,Accuracy,Minnesota RØKKR,1,Invasion,Hardpoint,22,25,0.88,-3,4092,205,0,0
1,27259,2024-03-03,Sunday,Lyynnz,Minnesota RØKKR,1,Invasion,Hardpoint,27,24,1.13,3,5396,205,0,0
2,27259,2024-03-03,Sunday,Owakening,Minnesota RØKKR,1,Invasion,Hardpoint,21,25,0.84,-4,4730,205,0,0
3,27259,2024-03-03,Sunday,Vivid,Minnesota RØKKR,1,Invasion,Hardpoint,28,28,1.00,0,5699,205,0,0
4,27259,2024-03-03,Sunday,Afro,Los Angeles Thieves,1,Invasion,Hardpoint,27,24,1.13,3,5148,250,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
403,27252,2024-03-01,Friday,Vivid,Minnesota RØKKR,3,Invasion,Control,18,29,0.62,-11,5050,2,0,0
404,27252,2024-03-01,Friday,aBeZy,Atlanta FaZe,3,Invasion,Control,36,22,1.64,14,5881,3,1,1
405,27252,2024-03-01,Friday,Cellium,Atlanta FaZe,3,Invasion,Control,24,22,1.09,2,5831,3,1,1
406,27252,2024-03-01,Friday,Drazah,Atlanta FaZe,3,Invasion,Control,29,26,1.12,3,5604,3,1,1


In [62]:
# Clean Minnesota RØKKR team name
cdl_df.replace("Minnesota RØKKR", "Minnesota ROKKR")

Unnamed: 0,Match,Date,Day,Player,Team,Map #,Map,Gamemode,Kills,Deaths,K/D,+/-,Damage,Team Score,Map Result,Series Result
192,27259,2024-03-03,Sunday,Accuracy,Minnesota ROKKR,1,Invasion,Hardpoint,22,25,0.88,-3,4092,205,0,0
193,27259,2024-03-03,Sunday,Lyynnz,Minnesota ROKKR,1,Invasion,Hardpoint,27,24,1.13,3,5396,205,0,0
194,27259,2024-03-03,Sunday,Owakening,Minnesota ROKKR,1,Invasion,Hardpoint,21,25,0.84,-4,4730,205,0,0
195,27259,2024-03-03,Sunday,Vivid,Minnesota ROKKR,1,Invasion,Hardpoint,28,28,1.00,0,5699,205,0,0
196,27259,2024-03-03,Sunday,Afro,Los Angeles Thieves,1,Invasion,Hardpoint,27,24,1.13,3,5148,250,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
403,27252,2024-03-01,Friday,Vivid,Minnesota ROKKR,3,Invasion,Control,18,29,0.62,-11,5050,2,0,0
404,27252,2024-03-01,Friday,aBeZy,Atlanta FaZe,3,Invasion,Control,36,22,1.64,14,5881,3,1,1
405,27252,2024-03-01,Friday,Cellium,Atlanta FaZe,3,Invasion,Control,24,22,1.09,2,5831,3,1,1
406,27252,2024-03-01,Friday,Drazah,Atlanta FaZe,3,Invasion,Control,29,26,1.12,3,5604,3,1,1


In [42]:
from sqlalchemy import create_engine
from config import db_password

In [63]:
# Connect to SQL db
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5433/cdl_db"
engine = create_engine(db_string)
cdl_df.to_sql(name = 'cdl_data_' + dt.date.today().strftime('%Y_%m_%d'), con = engine, if_exists = 'replace')

216