In [55]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
from sklearn.linear_model import LinearRegression
from matplotlib import pyplot


In [62]:
data = pd.read_csv("dataset.csv")
del (data["Unnamed: 0"]) # Deleting unwanted column
data.head()

Unnamed: 0,gun,skin,rarity,stattrak,quality,price,quantity
0,AK-47,Point Disarray,Classified,0,Factory New,45.05,44.0
1,AK-47,Point Disarray,Classified,0,Minimal Wear,31.35,81.0
2,AK-47,Point Disarray,Classified,0,Field-Tested,19.59,122.0
3,AK-47,Point Disarray,Classified,0,Well-Worn,22.82,17.0
4,AK-47,Point Disarray,Classified,0,Battle-Scarred,16.42,40.0


In [63]:
data["rarity"].unique() ## Finding which strings we need to map to touch up the dataset

array(['Classified', 'Restricted', 'Covert', 'Mil-Spec', 'Industrial',
       'Consumer', 'Contraband'], dtype=object)

In [64]:
rarity_map = {"Consumer": 1, "Industrial": 2, "Mil-Spec": 3, "Restricted": 4,
              "Classified": 5, "Covert": 6, "Contraband": 7}

data["rarity"] = data["rarity"].map(rarity_map) ## Mapping numeric values to rarity
data.head()


Unnamed: 0,gun,skin,rarity,stattrak,quality,price,quantity
0,AK-47,Point Disarray,5,0,Factory New,45.05,44.0
1,AK-47,Point Disarray,5,0,Minimal Wear,31.35,81.0
2,AK-47,Point Disarray,5,0,Field-Tested,19.59,122.0
3,AK-47,Point Disarray,5,0,Well-Worn,22.82,17.0
4,AK-47,Point Disarray,5,0,Battle-Scarred,16.42,40.0


In [65]:
original_length = len(data)
data = data.dropna() ##Dropping NaN values from the dataset
print(original_length - len(data), "NaN items dropped.")

342 NaN items dropped.


In [66]:
data["price"] = pd.to_numeric(data["price"], downcast="float") #Downcasting the prices to floats from strings

In [68]:
'''
This builds a new dataframe that averages the different prices of each skin's condition.
The end result is a dataframe that has a weapon, it's skin, that skin's rarity level, and its average price.
'''

new_data = pd.DataFrame(columns=['gun', 'skin', 'rarity', 'stattrak', 'avg-price'])
guns = data["gun"].unique()

for gun in guns:
    g = data[data["gun"].str.match(gun)]
    skins = g["skin"].unique()
    #print(skins)
    for s in skins:
        tempDf = data[(data['gun'] == gun) & (data['skin'] == s)]
        groups = tempDf["stattrak"].unique()
        lister = []
        for group in groups:
            g_list = tempDf[(tempDf['stattrak'] == group)]
            #print(g_list)
            new_d = {'gun':gun, 'skin':s, 'rarity':g_list['rarity'].mean(), 'stattrak':group, 'avg-price':g_list['price'].mean()}
            new_data = new_data.append(new_d, ignore_index=True)


In [69]:
new_data["rarity"] = pd.to_numeric(new_data["rarity"], downcast="integer")

In [70]:
new_data.head() #Checking to make sure it makes sense before saving to .csv

Unnamed: 0,gun,skin,rarity,stattrak,avg-price
0,AK-47,Point Disarray,5,0,27.046001
1,AK-47,Point Disarray,5,1,69.606003
2,AK-47,Blue Laminate,4,0,8.3225
3,AK-47,Blue Laminate,4,1,25.054998
4,AK-47,Vulcan,6,0,83.848


In [71]:
pistols = ['CZ75-Auto', 'Desert Eagle', 'Dual Berettas', 'Five-SeveN', 'Glock-18', 'P2000', 'P250', 'R8 Revolver', 'Tec-9', 'USP-S']
rifles = ['AK-47', 'AUG', 'AWP', 'FAMAS', 'G3SG1', 'Galil AR', 'M4A1-S', 'M4A4', 'SCAR-20', 'SG 553', 'SSG 08']
smgs = ['MAC-10', 'MP5-SD', 'MP7', 'MP9', 'PP-Bizon', 'P90', 'P90', 'UMP-45']
heavies = ['MAG-7', 'Nova', 'Sawed-Off', 'XM1014', 'M249', 'Negev']
knives = ['★ Nomad Knife', '★ Skeleton Knife', '★ Survival Knife', '★ Paracord Knife', '★ Classic Knife', '★ Navaja Knife', '★ Stiletto Knife', '★ Talon Knife', '★ Ursus Knife', '★ Bayonet', '★ Bowie Knife', '★ Butterfly Knife', '★ Falchion Knife', '★ Flip Knife', '★ Gut Knife', '★ Huntsman Knife', '★ Karambit', '★ M9 Bayonet', '★ Shadow Daggers']

categories = []
new_data['gun'] = new_data['gun'].str.rstrip()
for weapon in new_data['gun']:
    if weapon in pistols:
        categories.append('pistol')
    elif weapon in rifles:
        categories.append('rifle')
    elif weapon in smgs:
        categories.append('smg')
    elif weapon in heavies:
        categories.append('heavy')
    elif weapon in knives:
        categories.append('knife')
    else:
        categories.append('none')
        
new_data['category'] = categories
new_data.head()

Unnamed: 0,gun,skin,rarity,stattrak,avg-price,category
0,AK-47,Point Disarray,5,0,27.046001,rifle
1,AK-47,Point Disarray,5,1,69.606003,rifle
2,AK-47,Blue Laminate,4,0,8.3225,rifle
3,AK-47,Blue Laminate,4,1,25.054998,rifle
4,AK-47,Vulcan,6,0,83.848,rifle


In [72]:
new_data.to_csv('average-price-data-clean.csv')