## Import libraries
Using BeautifulSoup to scrape data off of Ubisoft's Official Website and writing data to .csv file.

Note: To get your user agent, search "What is my user agent" and copy the string given to you on the search page or visit [this](https://www.whatismybrowser.com/detect/what-is-my-user-agent/) website.

In [12]:
import os
import pandas as pd
import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv
load_dotenv()

USER_AGENT = os.getenv("USER_AGENT")

Generate list of operator names to get description page for each of the operators

In [13]:

# List of all operator names in game
operator_names = [
    "lion",
    "amaru",
    "goyo",
    "nokk",
    "warden",
    "mozzie",
    "gridlock",
    "nomad",
    "kaid",
    "clash",
    "maverick",
    "maestro",
    "alibi",
    "finka",
    "vigil",
    "dokkaebi",
    "zofia",
    "ela",
    "ying",
    "lesion",
    "mira",
    "jackal",
    "hibana",
    "echo",
    "caveira",
    "capitao",
    "blackbeard",
    "valkyrie",
    "buck",
    "frost",
    "mute",
    "sledge",
    "smoke",
    "thatcher",
    "ash",
    "castle",
    "pulse",
    "thermite",
    "montagne",
    "twitch",
    "doc",
    "rook",
    "jager",
    "bandit",
    "blitz",
    "iq",
    "fuze",
    "glaz",
    "tachanka",
    "kapkan",
    "kali",
    "wamai",
    "iana",
    "oryx",
    "ace",
    "melusi",
    "zero",
    "aruni",
    "flores",
    "thunderbird",
    "osa",
    "thorn",
    "azami",
    "sens",
    "grim",
    "solis",
    "brava",
    "fenrir",
    "ram",
    "tubarao",
    "deimos",
    "striker",
    "sentry",
    "skopos",
]

## Create functions to get operator properties from page
Each operator info page has the same page structure so we can use class naming conventions and tags to get specific operator data from the respective operator page (Name, Side, Stats, and Loadout). 

In [14]:
def getName(soup):
    name = (
        soup.find(
            "div",
            attrs={"class": "operator__header__icons__names"},
        )
        .find("h1")
        .text
    )
    return name.upper()

In [15]:
def getSide(soup):
    side = (
        soup.find(
            "div",
            attrs={"class": "operator__header__side__detail"},
        )
        .find("span")
        .text
    )
    return side.upper()

In [16]:
def getStats(soup):
    stats = {}
    stats_divs = soup.find(
        "div",
        attrs={"class": "operator__header__stats"},
    )
    for stat in stats_divs:
        stat_title = (
            stat.find("div", attrs={"class": "operator__header__stat__title"})
            .find("span")
            .text
        )

        stat_title = stat_title.lower()

        rater = stat.find(
            "div",
            attrs={"class": "react-rater"},
        )

        filled_points = rater.find_all(
            "div", attrs={"class": "react-rater-star is-disabled is-active"}
        )
        stats.update({stat_title: len(filled_points)})
    return stats

In [17]:
def getLoadout(soup):
    loadout = {}
    loadout_divs = soup.find_all("div", attrs={"class": "operator__loadout__category"})

    for i, div in enumerate(loadout_divs):
        loadout_title = (
            div.find("h2", attrs={"class": "operator__loadout__category__title"})
            .find("span")
            .text
        )

        loadout_title = loadout_title.lower()
        loadout_title = loadout_title.replace(" ", "_")

        if (
            loadout_title == "primary_weapon"
            or loadout_title == "secondary_weapon"
            or loadout_title == "gadget"
        ):
            for i in range(1, 4):
                loadout.update({f"{loadout_title}_{i}": None})

        items_div = div.find_all("div", attrs={"class": "operator__loadout__weapon"})
        for idx, item in enumerate(items_div):
            item_name = item.find("p").text
            loadout.update({f"{loadout_title}_{str(idx + 1)}": item_name.upper()})
    return loadout

### Function to request page and return operator information

In [18]:
def scrape_operator(operator_name):
    url = f"https://www.ubisoft.com/en-ca/game/rainbow-six/siege/game-info/operators/{operator_name}"
    HEADERS = {
        "User-Agent": USER_AGENT,
        "Accept-Language": "en-US en;q=0.5",
    }
    page = requests.get(url, headers=HEADERS).text
    soup = BeautifulSoup(page, "html.parser")
    operator_information = {}

    name = getName(soup)
    side = getSide(soup)
    stats = getStats(soup)
    loadout = getLoadout(soup)

    operator_information.update({"name": name, "side": side})
    operator_information.update(stats)
    operator_information.update(loadout)

    return operator_information

Iterate through all of the operator names and append operator information extracted from web page to a list

In [19]:
operator_data = []
for name in operator_names:
    operator_data.append(scrape_operator(name))
operator_data

[{'name': 'LION',
  'side': 'ATTACKER',
  'health': 2,
  'speed': 2,
  'difficulty': 1,
  'primary_weapon_1': '417',
  'primary_weapon_2': 'SG-CQB',
  'primary_weapon_3': 'V308',
  'secondary_weapon_1': 'LFP586',
  'secondary_weapon_2': 'P9',
  'secondary_weapon_3': None,
  'gadget_1': 'CLAYMORE',
  'gadget_2': 'FRAG GRENADE',
  'gadget_3': 'STUN GRENADE',
  'unique_ability_1': 'EE-ONE-D'},
 {'name': 'AMARU',
  'side': 'ATTACKER',
  'health': 2,
  'speed': 2,
  'difficulty': 2,
  'primary_weapon_1': 'G8A1',
  'primary_weapon_2': 'SUPERNOVA',
  'primary_weapon_3': None,
  'secondary_weapon_1': 'GONNE-6',
  'secondary_weapon_2': 'SMG-11',
  'secondary_weapon_3': 'ITA12S',
  'gadget_1': 'STUN GRENADE',
  'gadget_2': 'HARD BREACH CHARGE',
  'gadget_3': None,
  'unique_ability_1': 'GARRA HOOK'},
 {'name': 'GOYO',
  'side': 'DEFENDER',
  'health': 2,
  'speed': 2,
  'difficulty': 2,
  'primary_weapon_1': 'VECTOR .45 ACP',
  'primary_weapon_2': 'TCSG12',
  'primary_weapon_3': None,
  'seconda

Convert list of data to a dataframe

In [20]:
df = pd.DataFrame(operator_data)
df

Unnamed: 0,name,side,health,speed,difficulty,primary_weapon_1,primary_weapon_2,primary_weapon_3,secondary_weapon_1,secondary_weapon_2,secondary_weapon_3,gadget_1,gadget_2,gadget_3,unique_ability_1,gadget_4,gadget_5,gadget_6,gadget_7
0,LION,ATTACKER,2,2,1,417,SG-CQB,V308,LFP586,P9,,CLAYMORE,FRAG GRENADE,STUN GRENADE,EE-ONE-D,,,,
1,AMARU,ATTACKER,2,2,2,G8A1,SUPERNOVA,,GONNE-6,SMG-11,ITA12S,STUN GRENADE,HARD BREACH CHARGE,,GARRA HOOK,,,,
2,GOYO,DEFENDER,2,2,2,VECTOR .45 ACP,TCSG12,,P229,,,PROXIMITY ALARM,BULLETPROOF CAMERA,IMPACT GRENADE,VOLCÁN CANISTER,,,,
3,NØKK,ATTACKER,2,2,3,FMG-9,SIX12 SD,,5.7 USG,D-50,,HARD BREACH CHARGE,FRAG GRENADE,IMPACT EMP GRENADE,HEL PRESENCE REDUCTION,,,,
4,WARDEN,DEFENDER,3,1,2,M590A1,MPX,,P-10C,SMG-12,,DEPLOYABLE SHIELD,NITRO CELL,OBSERVATION BLOCKER,GLANCE SMART GLASSES,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69,TUBARÃO,DEFENDER,2,2,2,MPX,AR-15.50,,P226 MK 25,,,NITRO CELL,PROXIMITY ALARM,,ZOTO CANISTER,,,,
70,DEIMOS,ATTACKER,2,2,2,AK-74M,M590A1,,.44 VENDETTA,,,FRAG GRENADE,HARD BREACH CHARGE,,DEATHMARK TRACKER,,,,
71,STRIKER,ATTACKER,2,2,1,M4,M249,,5.7 USG,ITA12S,,BREACH CHARGE,CLAYMORE,FRAG GRENADE,GADGET KIT,HARD BREACH CHARGE,SMOKE GRENADE,STUN GRENADE,IMPACT EMP GRENADE
72,SENTRY,DEFENDER,2,2,1,COMMANDO 9,M870,,C75 AUTO,SUPER SHORTY,,BARBED WIRE,BULLETPROOF CAMERA,DEPLOYABLE SHIELD,GADGET KIT,OBSERVATION BLOCKER,IMPACT GRENADE,NITRO CELL,PROXIMITY ALARM


Convert dataframe to .csv file

In [11]:
df.to_csv("r6_operator_data.csv", encoding="utf-8")