In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

import matplotlib.pylab as plt
from matplotlib.pyplot import imread
%matplotlib inline


#---naive bayes
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
#---feature selection
from sklearn.datasets import make_classification
from sklearn.ensemble import ExtraTreesClassifier

from sklearn import preprocessing
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.model_selection import train_test_split

#suppress warnings - necessary due to data set
#import warnings
#warnings.filterwarnings("ignore")

## Importing and Cleaning Data for Use

In addition to cleaning up some of the data in order to use it, prior to uploading to GitHub, it was necessary to cull a multitude of rows and columns in order to meet with the 100MB upload requirement, and the total 1GB repository size. Over half of the data set (featuring over 6 million rows in some cases) has to be dropped so it was workable, not only to fit file size limitations, but also to ensure a realistic computational speed.

Specifically, for cleaning this set of data, we:
- Narrowed the number of maps to reflect the ones most commonly played.
- Removed and replaced team names to encourage consistency.

In [2]:
duel_df = pd.read_csv('data/mm_master_demos.csv',index_col = 0)
nade_df = pd.read_csv('data/mm_grenades_demos.csv',index_col = 0)
map_boundaries= pd.read_csv('data/map_data.csv',index_col = 0)

def dropUnusedMaps(df):
    main_maps = ['de_cache', 'de_cbble', 'de_dust2', 'de_inferno', 'de_mirage', 'de_overpass', 'de_train']
    df = df[df['map'].isin(main_maps)]
    df = df.reset_index(drop=True)

#cleaning the data so we only see the maps which are commonly played
dropUnusedMaps(duel_df)
dropUnusedMaps(nade_df)
    


  interactivity=interactivity, compiler=compiler, result=result)


## Naive Bayes

In [3]:
duel_nb = duel_df



duel_nb = duel_nb.drop(columns=['date','tick','seconds',
                                'att_team','vic_team','att_side','vic_side','hp_dmg','arm_dmg','bomb_site',
                               'hitbox','wp','wp_type','award','att_id','att_rank','vic_id','vic_rank',
                               'att_pos_x','att_pos_y','vic_pos_x','vic_pos_y'])

duel_nb.shape
duel_nb.head()

Unnamed: 0,file,map,round,is_bomb_planted,winner_team,winner_side,round_type,ct_eq_val,t_eq_val,avg_match_rank
0.0,003201673717864202280_0171883906.dem,de_dust2,1.0,False,Team 1,CounterTerrorist,PISTOL_ROUND,2950.0,3850.0,16.0
1.0,003201673717864202280_0171883906.dem,de_dust2,1.0,False,Team 1,CounterTerrorist,PISTOL_ROUND,2950.0,3850.0,16.0
2.0,003201673717864202280_0171883906.dem,de_dust2,1.0,False,Team 1,CounterTerrorist,PISTOL_ROUND,2950.0,3850.0,16.0
3.0,003201673717864202280_0171883906.dem,de_dust2,1.0,False,Team 1,CounterTerrorist,PISTOL_ROUND,2950.0,3850.0,16.0
4.0,003201673717864202280_0171883906.dem,de_dust2,1.0,False,Team 1,CounterTerrorist,PISTOL_ROUND,2950.0,3850.0,16.0


In [4]:
duel_nb = duel_nb.drop_duplicates(['file','round'],keep='last')

duel_nb.shape
duel_nb.head()

Unnamed: 0,file,map,round,is_bomb_planted,winner_team,winner_side,round_type,ct_eq_val,t_eq_val,avg_match_rank
27.0,003201673717864202280_0171883906.dem,de_dust2,1.0,False,Team 1,CounterTerrorist,PISTOL_ROUND,2950.0,3850.0,16.0
53.0,003201673717864202280_0171883906.dem,de_dust2,2.0,True,Team 2,Terrorist,ECO,12400.0,4700.0,16.0
81.0,003201673717864202280_0171883906.dem,de_dust2,3.0,True,Team 2,Terrorist,ECO,4700.0,21050.0,16.0
102.0,003201673717864202280_0171883906.dem,de_dust2,4.0,True,Team 2,Terrorist,SEMI_ECO,9200.0,24250.0,16.0
127.0,003201673717864202280_0171883906.dem,de_dust2,5.0,True,Team 2,Terrorist,SEMI_ECO,10700.0,25750.0,16.0


In [5]:
le = preprocessing.LabelEncoder()

#col = ['map','att_team','vic_team','att_side','vic_side',
#       'is_bomb_planted','hitbox','wp','wp_type',
#       'round_type']

#le.fit(duel_nb['map'])
#duel_nb['map']=le.transform(duel_nb['map'])

#le.fit(duel_nb['is_bomb_planted'])
#duel_nb['is_bomb_planted']=le.transform(duel_nb['is_bomb_planted'])

#le.fit(duel_nb['round_type'])
#duel_nb['round_type']=le.transform(duel_nb['round_type'])

duel_nb["map"] = duel_nb["map"].astype('category')
duel_nb["map"] = duel_nb["map"].cat.codes 

duel_nb["is_bomb_planted"] = duel_nb["is_bomb_planted"].astype('category')
duel_nb["is_bomb_planted"] = duel_nb["is_bomb_planted"].cat.codes 

duel_nb["round_type"] = duel_nb["round_type"].astype('category')
duel_nb["round_type"] = duel_nb["round_type"].cat.codes 

duel_nb.head()

Unnamed: 0,file,map,round,is_bomb_planted,winner_team,winner_side,round_type,ct_eq_val,t_eq_val,avg_match_rank
27.0,003201673717864202280_0171883906.dem,8,1.0,0,Team 1,CounterTerrorist,3,2950.0,3850.0,16.0
53.0,003201673717864202280_0171883906.dem,8,2.0,1,Team 2,Terrorist,0,12400.0,4700.0,16.0
81.0,003201673717864202280_0171883906.dem,8,3.0,1,Team 2,Terrorist,0,4700.0,21050.0,16.0
102.0,003201673717864202280_0171883906.dem,8,4.0,1,Team 2,Terrorist,4,9200.0,24250.0,16.0
127.0,003201673717864202280_0171883906.dem,8,5.0,1,Team 2,Terrorist,4,10700.0,25750.0,16.0


In [6]:
#function to perform naive bayes analysis given a random_state seed

duel_nb = duel_nb.reset_index(drop=True)
duel_nb.dropna(how='any', inplace=True)

target_team = duel_nb['winner_side']
target_side = duel_nb['winner_team']
duel_nb = duel_nb.drop(columns=['file','winner_side','winner_team'])

def naive_bayes_analysis_team(seed, data):
    X_train= data.sample(frac = 0.7, replace = False, random_state=seed)
    X_test = data.drop(X_train.index)
    y_train = target_team.sample(frac = 0.7, replace = False, random_state=seed)
    y_test = target_team.drop(y_train.index)
    
    #multinomial
    mnb = MultinomialNB()
    mnb.fit(X_train, y_train)
    y_pred = mnb.predict(X_test)
    print("Multinomial: ", np.mean(y_pred==y_test))
    
    #encoded
    enc = KBinsDiscretizer(n_bins=5, encode='ordinal')
    X_bin = enc.fit_transform(X_train)
    X_bin_test = enc.fit_transform(X_test)
    encmnb = MultinomialNB()
    encmnb.fit(X_bin, y_train)
    y_pred2 = encmnb.predict(X_bin_test)
    print ("Encoded:     ",np.mean(y_pred2==y_test))
    
def naive_bayes_analysis_side(seed, data):
    X_train= data.sample(frac = 0.7, replace = False, random_state=seed)
    X_test = data.drop(X_train.index)
    y_train = target_side.sample(frac = 0.7, replace = False, random_state=seed)
    y_test = target_side.drop(y_train.index)
    
    #multinomial
    mnb = MultinomialNB()
    mnb.fit(X_train, y_train)
    y_pred = mnb.predict(X_test)
    print("Multinomial: ", np.mean(y_pred==y_test))
    
    #encoded
    enc = KBinsDiscretizer(n_bins=5, encode='ordinal')
    X_bin = enc.fit_transform(X_train)
    X_bin_test = enc.fit_transform(X_test)
    encmnb = MultinomialNB()
    encmnb.fit(X_bin, y_train)
    y_pred2 = encmnb.predict(X_bin_test)
    print ("Encoded:     ",np.mean(y_pred2==y_test))

In [7]:
duel_nb

Unnamed: 0,map,round,is_bomb_planted,round_type,ct_eq_val,t_eq_val,avg_match_rank
0,8,1.0,0,3,2950.0,3850.0,16.0
1,8,2.0,1,0,12400.0,4700.0,16.0
2,8,3.0,1,0,4700.0,21050.0,16.0
3,8,4.0,1,4,9200.0,24250.0,16.0
4,8,5.0,1,4,10700.0,25750.0,16.0
5,8,6.0,0,2,20000.0,26750.0,16.0
6,8,7.0,1,1,13000.0,24600.0,16.0
7,8,8.0,0,2,19450.0,21100.0,16.0
8,8,9.0,0,2,21200.0,20400.0,16.0
9,8,10.0,0,0,24600.0,7700.0,16.0


In [8]:
#loop to print all different naive bayes analysis with differnt seeds
for i in range (10, 110, 10):
    print("Seed of:", i)
    naive_bayes_analysis_team(i,duel_nb)
    print()

Seed of: 10
Multinomial:  0.6608391608391608
Encoded:      0.6998834498834499

Seed of: 20
Multinomial:  0.6343240093240093
Encoded:      0.6794871794871795

Seed of: 30
Multinomial:  0.6497668997668997
Encoded:      0.7004662004662005

Seed of: 40
Multinomial:  0.6451048951048951
Encoded:      0.6794871794871795

Seed of: 50
Multinomial:  0.6497668997668997
Encoded:      0.6925990675990676

Seed of: 60
Multinomial:  0.6378205128205128
Encoded:      0.6783216783216783

Seed of: 70
Multinomial:  0.6381118881118881
Encoded:      0.6826923076923077

Seed of: 80
Multinomial:  0.6448135198135199
Encoded:      0.6934731934731935

Seed of: 90
Multinomial:  0.6468531468531469
Encoded:      0.6832750582750583

Seed of: 100
Multinomial:  0.6372377622377622
Encoded:      0.6797785547785548



In [9]:
for i in range (10, 110, 10):
    print("Seed of:", i)
    naive_bayes_analysis_side(i,duel_nb)
    print()

Seed of: 10
Multinomial:  0.08974358974358974
Encoded:      0.5448717948717948

Seed of: 20
Multinomial:  0.06497668997668998
Encoded:      0.5157342657342657

Seed of: 30
Multinomial:  0.057692307692307696
Encoded:      0.5268065268065268

Seed of: 40
Multinomial:  0.08945221445221445
Encoded:      0.5244755244755245

Seed of: 50
Multinomial:  0.06235431235431235
Encoded:      0.5236013986013986

Seed of: 60
Multinomial:  0.06351981351981352
Encoded:      0.5358391608391608

Seed of: 70
Multinomial:  0.060023310023310024
Encoded:      0.5291375291375291

Seed of: 80
Multinomial:  0.06177156177156177
Encoded:      0.5291375291375291

Seed of: 90
Multinomial:  0.0673076923076923
Encoded:      0.5163170163170163

Seed of: 100
Multinomial:  0.06963869463869464
Encoded:      0.5305944055944056



In [10]:
def rfe(target):
    #declaring train and test variables again
    X_train= duel_nb.sample(frac = 0.7, replace = False, random_state=0)
    X_test = duel_nb.drop(X_train.index)
    y_train = target.sample(frac = 0.7, replace = False, random_state=0)
    y_test = target.drop(y_train.index)

    #algorithm to find the highest ranking features
    #obtained from: http://scikit-learn.org/stable/auto_examples/ensemble/plot_forest_importances.html#sphx-glr-auto-examples-ensemble-plot-forest-importances-py

    forest = ExtraTreesClassifier(n_estimators=250,
                              random_state=0)
    forest.fit(X_train,y_train)

    importances = forest.feature_importances_
    std = np.std([tree.feature_importances_ for tree in forest.estimators_],
             axis=0)
    indices = np.argsort(importances)[::-1]
    # Print the feature ranking
    print("Feature ranking:")

    for f in range(X_train.shape[1]):
        print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))
    

In [11]:
print("target = team")
rfe(target_team)

print("target = side")
rfe(target_side)

target = team
Feature ranking:
1. feature 2 (0.249616)
2. feature 4 (0.218266)
3. feature 5 (0.202453)
4. feature 1 (0.121838)
5. feature 6 (0.094949)
6. feature 0 (0.081985)
7. feature 3 (0.030893)
target = side
Feature ranking:
1. feature 1 (0.310294)
2. feature 4 (0.207131)
3. feature 5 (0.205525)
4. feature 6 (0.100809)
5. feature 0 (0.092768)
6. feature 2 (0.050086)
7. feature 3 (0.033387)


In [12]:
duel_nb.head()

Unnamed: 0,map,round,is_bomb_planted,round_type,ct_eq_val,t_eq_val,avg_match_rank
0,8,1.0,0,3,2950.0,3850.0,16.0
1,8,2.0,1,0,12400.0,4700.0,16.0
2,8,3.0,1,0,4700.0,21050.0,16.0
3,8,4.0,1,4,9200.0,24250.0,16.0
4,8,5.0,1,4,10700.0,25750.0,16.0


## Smoke Spots

Smoke grenades are an essential tactical tool in CS:GO. They have the power to obscure the vision of enemy players, and facilitate skilled tactical play. Here, we will create a heat map of the most popular smoke spots on each of the 6 procured map arenas.

In [13]:
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.metrics import r2_score

In [15]:
#UNFINISHED REGRESSION
new_df = duel_df.dropna()
new_df = new_df.reset_index(drop=True)
new_df["hitbox"] = new_df["hitbox"].astype('category')
new_df["hitbox_cat"] = new_df["hitbox"].cat.codes 

new_df["winner_side"] = new_df["winner_side"].astype('category')
new_df["winner_side_cat"] = new_df["winner_side"].cat.codes 

In [16]:
reg = linear_model.LinearRegression()
x = new_df[['att_rank']]
y = new_df['hitbox_cat']
reg.fit(x, y)
print("y = x *", reg.coef_, "+", reg.intercept_)

y = x * [0.01602153] + 2.4636507082511367


In [17]:
predicted_pw = 1.4 * reg.coef_ + reg.intercept_
predicted_pw

array([2.48608086])

In [18]:
predicted = reg.predict(x)
mse = ((np.array(y)-predicted)**2).sum()/len(y)
r2 = r2_score(y, predicted)
print("MSE:", mse)
print("R Squared:", r2)

MSE: 7.90622134391043
R Squared: 0.0004332661873110988


In [None]:
ranks = new_df['att_rank']
ranks.hist(histtype = 'stepfilled', bins = 20)

In [None]:
reg = linear_model.LinearRegression()

eq_diff_ct = []

for i in range(0, len(new_df)):
    diff = (new_df['ct_eq_val'][i] - new_df['t_eq_val'][i])
    eq_diff_ct.append(diff)
        
new_df['eq_diff_ct'] = eq_diff_ct

x = new_df[['eq_diff_ct']]
y = new_df['winner_side_cat']
reg.fit(x, y)
print("y = x *", reg.coef_, "+", reg.intercept_)

predicted_pw = 1.4 * reg.coef_ + reg.intercept_
predicted_pw

predicted = reg.predict(x)
mse = ((np.array(y)-predicted)**2).sum()/len(y)
r2 = r2_score(y, predicted)
print("MSE:", mse)
print("R Squared:", r2)

In [None]:
eq_diff_t = []

for i in range(0, len(new_df)):
    diff = (new_df['ct_eq_val'][i] - new_df['t_eq_val'][i])
    eq_diff_t.append(diff)
        
new_df['eq_diff_t'] = eq_diff_t

x = new_df[['eq_diff_t']]
y = new_df['winner_side_cat']
reg.fit(x, y)
print("y = x *", reg.coef_, "+", reg.intercept_)

predicted_pw = 1.4 * reg.coef_ + reg.intercept_
predicted_pw

predicted = reg.predict(x)
mse = ((np.array(y)-predicted)**2).sum()/len(y)
r2 = r2_score(y, predicted)
print("MSE:", mse)
print("R Squared:", r2)

In [None]:
duel_df.dropna()
duel_df = duel_df.reset_index(drop=True)

duel_df['hitbox'].value_counts()

In [None]:
from sklearn import preprocessing
#le = preprocessing.LabelEncoder()

#le.fit(duel_df['hitbox'])
#duel_df['hitbox']=le.transform(duel_df['hitbox'])

duel_df["hitbox"] = duel_df["hitbox"].astype('category')
duel_df["hitbox_cat"] = duel_df["hitbox"].cat.codes 

print(duel_df['hitbox'].value_counts())
print(duel_df['hitbox_cat'].value_counts())

In [None]:
plot1 = duel_df['hitbox'].value_counts().plot(kind='bar',figsize=(14,8))