# Team Fortress 2 Unusual Data Analysis

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
pd.options.display.max_rows = 10

## Scraping the TF2 Wiki

In [2]:
import requests
resp = requests.get("https://wiki.teamfortress.com/wiki/Template:Unusual_quality_table")

In [3]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(resp.content, "html.parser")

In [4]:
tf2_classes = ["Scout","Soldier","Pyro","Demoman","Heavy","Medic","Spy","Sniper","Engineer","All classes"]
relevant_equip_regions = ["Primary","Secondary","Cosmetic","Taunt","Melee"]

In [5]:
tables = soup.find_all("table")
rows = []
for table in tables:
    for row in table.find_all("tr"):
        rows.append(row.text.strip().split("\n\n"))

In [6]:
class_info = rows[1:-1]

i = 0
current_class = "Scout"
current_equip = "Cosmetic"

results = []

while(i < len(class_info)):
    
    append = True
    
    current = class_info[i]
    
    for tfclass in tf2_classes:
        if tfclass in current:
            current_class = current[0]
            current_equip = current[1][1:]
            append = False
        
    for equip_region in relevant_equip_regions:
        if equip_region in current:
            if i < len(class_info) - 1:
                current_equip = current[0]
                append = False
    if(append):
        results.append([current_class,current_equip,current])
    i = i + 1

In [7]:
final_results = []
for item in results:
    if item[1] == "Taunt" or item[1] == "Cosmetic" and len(item[2][0]) > 0:
        for cosmetic_or_taunt in item[2]:
            new_cosmetic_or_taunt = cosmetic_or_taunt
            if len(cosmetic_or_taunt) > 0 and cosmetic_or_taunt[0].isspace():
                new_cosmetic_or_taunt = cosmetic_or_taunt[1:]
            final_results.append([new_cosmetic_or_taunt,item[0],item[1]])

In [8]:
class_scraped_df = pd.DataFrame(final_results).rename(columns = {0: "Name",1 : "Class", 2: "Item Type"})

class_scraped_df

Unnamed: 0,Name,Class,Item Type
0,Batter's Helmet,Scout,Cosmetic
1,Baseball Bill's Sports Shine,Scout,Cosmetic
2,Bonk Helm,Scout,Cosmetic
3,Ye Olde Baker Boy,Scout,Cosmetic
4,Troublemaker's Tossle Cap,Scout,Cosmetic
...,...,...,...
491,Zoomin' Broom,All classes,Taunt
492,Second Rate Sorcery,All classes,Taunt
493,Victory Lap,All classes,Taunt
494,Yeti Punch,All classes,Taunt


In [9]:
resp2 = requests.get("https://wiki.teamfortress.com/wiki/Unusual")
soup2 = BeautifulSoup(resp2.content, "html.parser")

In [10]:
dict_of_gen_names = ({"Series #1" : "First Gen", "Series #26" : "Second Gen", "Series #59" : "Third Gen",
                     "Very Scary Halloween Special" : "Halloween 2011", "Spectral Halloween Special" : "Halloween 2012",
                     "Scream Fortress 2013" : "Halloween 2013", "Scream Fortress 2014" : "Halloween 2014", "Scream Fortress 2015" : "Halloween 2015", "Scream Fortress 2016" : "Halloween 2016",
                     "Scream Fortress 2018" : "Halloween 2018", "Robotic Boogaloo" : "Robotic Boogaloo", "Love & War" : "Love & War",
                     "End of the Line" : "End of the Line", "Gun Mettle" : "Gun Mettle","Invasion" : "Invasion"})
current_gen = "First Gen"

gens_result_list = []

for line in soup2.find_all("p")[5:]:
    append = True
    line_2 = line.text.strip()
    if(len(line_2) == 0):
        break
    
    if len(line_2.split(" ")) > 5 and len(line_2.split(" ")) < 50:
        
        for key in dict_of_gen_names.keys():
            if key in line_2:
                current_gen = dict_of_gen_names[key]
                append = False
                
    if(append):
        if len(line_2.split(" ")) <= 5:
            gens_result_list.append([line_2,current_gen])


In [11]:
gens= pd.DataFrame(gens_result_list).rename(columns = {0:"Effect",1: "Generation"})
gens = gens.drop(22)
gens = gens.drop(103) #drop repeats
gens.at[23,"Effect"] = "Aces High"
gens.at[104,"Effect"] = "Showstopper"
gens.at[52,"Effect"] = "The Ooze"
gens.at[49,"Effect"] = "Ghastly Ghosts Jr"
gens_df = gens

In [12]:
gens_df

Unnamed: 0,Effect,Generation
0,Burning Flames,First Gen
1,Circling Heart,First Gen
2,Circling Peace Sign,First Gen
3,Circling TF Logo,First Gen
4,Green Confetti,First Gen
...,...,...
119,Magnetic Hat Protector,Invasion
120,Voltaic Hat Protector,Invasion
121,Galactic Codex,Invasion
122,Ancient Codex,Invasion


In [13]:
resp3 = requests.get("https://backpack.tf/developer/particles")
soup3 = BeautifulSoup(resp3.content, "html.parser")

In [14]:
id_array = []

tables = soup3.find_all("table")
for table in tables:
    for row in table.find_all("tr")[1:]:
        items = row.text.strip().split()
        id = items[0][1:]
        name = " ".join(items[1:len(items)-3])
        id_array.append([name,id])

id_df = pd.DataFrame(id_array).rename(columns = {0 : "Effect", 1: "Id"})
id_df

Unnamed: 0,Effect,Id
0,Community Sparkle,4
1,Holy Glow,5
2,Green Confetti,6
3,Purple Confetti,7
4,Haunted Ghosts,8
...,...,...
120,Flammable Bubbles of Attraction,3018
121,Poisonous Bubbles of Regret,3019
122,Roaring Rockets,3020
123,Spooky Night,3021


In [15]:
effects_df = pd.merge(gens_df,id_df,on = "Effect",how = "outer")
effects_df = effects_df[~effects_df["Generation"].isnull()]

In [16]:
effects_df.to_csv("Full Effect Data.csv")
effects_df

Unnamed: 0,Effect,Generation,Id
0,Burning Flames,First Gen,13
1,Circling Heart,First Gen,19
2,Circling Peace Sign,First Gen,18
3,Circling TF Logo,First Gen,11
4,Green Confetti,First Gen,6
...,...,...,...
117,Magnetic Hat Protector,Invasion,95
118,Voltaic Hat Protector,Invasion,96
119,Galactic Codex,Invasion,97
120,Ancient Codex,Invasion,98


In [17]:
resp4 = requests.get("https://steamcommunity.com/sharedfiles/filedetails/?id=731640447")
soup4 = BeautifulSoup(resp4.content, "html.parser")

In [18]:
crates = []
for line in soup4.find_all("div",{"class":"subSectionTitle"})[1:-1]:
    crates.append(line.text.strip())

In [19]:
crates

['The Scout Crate',
 'The Soldier Crate',
 'The Pyro Crate',
 'The Demoman Crate',
 'The Heavy Crate',
 'The Engineer Crate',
 'The Medic Crate',
 'The Sniper Crate',
 'The Spy Crate',
 'The Multi/All Crate']

In [20]:
counter = 0

final_arr = []

for line in soup4.find_all("div",{"class":"subSectionDesc"})[1:-1]: 
    two = (line.text.replace("*","").replace("Items you won't find", "%").
           replace("Items you won't  find", "%").replace("Items you can find","").
           replace("Things you won't find", "%").replace("Things you can find","").
          replace("\t","").replace("\n","")).split("%")
                                            
    find = two[0].split(",")
    wont_find = two[1].split(",")
    
    for item in find:
        final_arr.append([item.strip(),crates[counter][4:]])
    
    counter = counter + 1

In [21]:
unlocked_crates = pd.DataFrame(final_arr).rename(columns = {0: "Name",1:"Crate Name"})
unlocked_crates

Unnamed: 0,Name,Crate Name
0,Argyle Ace,Scout Crate
1,Backwards Ballcap,Scout Crate
2,Baseball Bill's Sports Shine,Scout Crate
3,Big Elfin Deal,Scout Crate
4,Bigg Mann on Campus,Scout Crate
...,...,...
446,Virtual Viewfinder,Multi/All Crate
447,Vive La France,Multi/All Crate
448,Weight Room Warmer,Multi/All Crate
449,Well-Rounded Rifleman,Multi/All Crate


In [22]:
cosmetics = np.array(class_scraped_df["Name"])

In [23]:
import time
import re

results = []

for cosmetic in cosmetics:
    
    to_search = cosmetic
    if cosmetic == "Defragmenting Hard Hat 17%":
        to_search = "Defragmenting_Hard_Hat_17%25" #the % messes up the query
        
    resp = requests.get("https://wiki.teamfortress.com/wiki/" + to_search)
    soup = BeautifulSoup(resp.content, "html.parser")

    equip_region = np.nan
    crate = np.nan
    grade = np.nan

    tables = soup.find_all("table")
    for table in tables[:2]:
        for row in table.find_all("tr"):
            line = row.text.strip()
            if "Equip region:" in line:
                temp = line[13:]
                if "Hat" not in temp and "Head" not in temp and "Whole" not in temp:
                    equip_region = "Misc"
            if "Availability:" in line:
                crate = re.sub('[^0-9]','', line)
                if len(crate) == 0:
                    crate = np.nan

            if "Grade" in line.split():
                grade_loc = line.split().index("Grade") -1
                grade = line.split()[grade_loc]
    results.append([cosmetic,equip_region,crate,grade])   
    time.sleep(0.1)

In [24]:
hat_misc_grade = pd.DataFrame(results)

In [25]:
hat_misc_grade

Unnamed: 0,0,1,2,3
0,Batter's Helmet,,,
1,Baseball Bill's Sports Shine,,,
2,Bonk Helm,,,
3,Ye Olde Baker Boy,,23,
4,Troublemaker's Tossle Cap,,3,
...,...,...,...,...
491,Zoomin' Broom,,,
492,Second Rate Sorcery,,,
493,Victory Lap,,,
494,Yeti Punch,,,


In [26]:
hat_misc_grade = hat_misc_grade.rename(columns = {0 : "Name", 1 : "Equip Region", 2 : "Crate Number", 3 : "Grade"})
hat_misc_grade["Equip Region"] = hat_misc_grade["Equip Region"].fillna("Regular")
hat_misc_grade["Grade"] = hat_misc_grade["Grade"].fillna("No Grade")
hat_misc_grade

Unnamed: 0,Name,Equip Region,Crate Number,Grade
0,Batter's Helmet,Regular,,No Grade
1,Baseball Bill's Sports Shine,Regular,,No Grade
2,Bonk Helm,Regular,,No Grade
3,Ye Olde Baker Boy,Regular,23,No Grade
4,Troublemaker's Tossle Cap,Regular,3,No Grade
...,...,...,...,...
491,Zoomin' Broom,Regular,,No Grade
492,Second Rate Sorcery,Regular,,No Grade
493,Victory Lap,Regular,,No Grade
494,Yeti Punch,Regular,,No Grade


In [27]:
relevant_cases = ([["Winter 2018 Cosmetic","122"],["Unleash the Beast Cosmetic","108"],["Abominable Cosmetic","107"],
                  ["Rainy Day Cosmetic","106"],["Unlocked Winter 2016 Cosmetic","2016"],["Mayflower Cosmetic","102"],
                  ["Tough Break Cosmetic","101"],["Violet Vermin","120"],["Blue Moon Cosmetic","119"],
                   ["Winter 2017 Cosmetic","117"],["Creepy Crawly","104"],["Gargoyle","98"],
                    ["Confidential Collection","97"],["Quarantined Collection","96"],
                    ["Gun Mettle Cosmetic","95"]])

non_unlocked_cases = pd.DataFrame(relevant_cases).rename(columns = {0: "Crate Name",1:"Crate Number"})
non_unlocked_cases

Unnamed: 0,Crate Name,Crate Number
0,Winter 2018 Cosmetic,122
1,Unleash the Beast Cosmetic,108
2,Abominable Cosmetic,107
3,Rainy Day Cosmetic,106
4,Unlocked Winter 2016 Cosmetic,2016
...,...,...
10,Creepy Crawly,104
11,Gargoyle,98
12,Confidential Collection,97
13,Quarantined Collection,96


In [28]:
unlocked_crates

Unnamed: 0,Name,Crate Name
0,Argyle Ace,Scout Crate
1,Backwards Ballcap,Scout Crate
2,Baseball Bill's Sports Shine,Scout Crate
3,Big Elfin Deal,Scout Crate
4,Bigg Mann on Campus,Scout Crate
...,...,...
446,Virtual Viewfinder,Multi/All Crate
447,Vive La France,Multi/All Crate
448,Weight Room Warmer,Multi/All Crate
449,Well-Rounded Rifleman,Multi/All Crate


In [29]:
combined_cases = hat_misc_grade.merge(non_unlocked_cases, on = "Crate Number", how = "left")

In [30]:
all_cases = combined_cases.merge(unlocked_crates, on = "Name", how = "left")
all_cases["Crate Name"] = all_cases["Crate Name_x"].fillna('') + all_cases["Crate Name_y"].fillna('')
all_cases = all_cases.replace("", np.nan, regex=True)
all_cases["Crate Name"] = all_cases["Crate Name"].fillna("Old Series")
final_cases = all_cases[all_cases.columns.drop(["Crate Name_x","Crate Name_y"])]
final_cases

Unnamed: 0,Name,Equip Region,Crate Number,Grade,Crate Name
0,Batter's Helmet,Regular,,No Grade,Old Series
1,Baseball Bill's Sports Shine,Regular,,No Grade,Scout Crate
2,Bonk Helm,Regular,,No Grade,Scout Crate
3,Ye Olde Baker Boy,Regular,23,No Grade,Scout Crate
4,Troublemaker's Tossle Cap,Regular,3,No Grade,Scout Crate
...,...,...,...,...,...
491,Zoomin' Broom,Regular,,No Grade,Old Series
492,Second Rate Sorcery,Regular,,No Grade,Old Series
493,Victory Lap,Regular,,No Grade,Old Series
494,Yeti Punch,Regular,,No Grade,Old Series


In [31]:
combined = pd.concat([class_scraped_df,final_cases[final_cases.columns.drop("Name")]],axis = 1)

#final_hat_info_df = combined[combined.columns.drop("Crate Number")]
#print(final_hat_info_df.to_string())
combined["Unboxed From"] = combined.apply(
    lambda row: "Unusualifier" if row["Crate Name"] == "Old Series" and row["Item Type"] == "Taunt" else row["Crate Name"],
    axis=1
)
combined["Unboxed From"] = combined.apply(
    lambda row: "Not Unboxable" if row["Equip Region"] == "Misc" else row["Unboxed From"],
    axis=1
)

In [32]:
multi_class = (["Team Captain","Hat With No Name","War Eagle","HazMat Headcase",
                "Powdered Practitioner","Toy Soldier","Pestering Jester","Trucker's Topper",
               "Deep Cover Operator","Nuke"])

combined["Class"] = combined.apply(
    lambda row: "Multi-Class" if row["Name"] in multi_class else row["Class"],
    axis=1
)

In [33]:
misc = (["Polar Pullover","Large Lunchadore","Master's Yellow Belt",
         "Soldered Sensei","Le Party Phantom"])
combined["Equip Region"] = combined.apply(
    lambda row: "Misc" if row["Name"] in misc else row["Equip Region"],
    axis=1
)

In [34]:
resp5 = requests.get("https://wiki.teamfortress.com/wiki/Robotic_Boogaloo")
soup5 = BeautifulSoup(resp5.content, "html.parser")

In [35]:
robos = []
tables = soup5.find_all("table")
for table in tables[1:2]:
    for row in table.find_all("tr"):
        text = row.text.strip().replace("\n","").replace("  ","%")
        if "%" in text:
            text = text[text.find("%")+1:]
        robos.append(text)
        
combined["Robo"] = combined.apply(
    lambda row: True if row["Name"] in robos else False,
    axis=1
)

In [36]:
combined = combined[combined.columns.drop(["Crate Name","Crate Number"])].drop(487)

In [37]:
combined = combined.drop_duplicates("Name")

In [38]:
combined.to_csv("Full Hat Data.csv")