In [119]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

from sklearn.linear_model import LogisticRegression

#### Fetching the data

In [110]:
df = pd.read_csv("data.csv")

In [112]:
# Irrelevant features
irr_fts = {
    "weapon": ["ak47", "ssg08", "fiveseven", "p250", "usps", "aug", "awp", "galilar", "glock", "cz75auto", "bizon", "elite", "famas", "g3sg1", "m249", "m4a1s", "m4a4", "mac10", "mag7", "mp5sd", "mp7", "mp9", "negev", "nova", "p90", "r8revolver", "sawedoff", "scar20", "sg553", "ump45", "xm1014", "deagle", "tec9", "p2000"],
    "grenade": ["hegrenade", "decoygrenade", "flashbang", "smokegrenade", "incendiarygrenade", "molotovgrenade"]
}

irr_cols = [] 

for ft_type, ft_names in irr_fts.items():
    for ft_name in ft_names:
        for side in ["ct", "t"]:
            value = f"{side}_{ft_type}_{ft_name}"
            irr_cols.append(value)

#### Remove irrelevant columns

In [113]:
df.drop(columns=irr_cols, inplace=True)

#### Convert features to suitable data types

In [129]:
le = LabelEncoder()
df["map"] = le.fit_transform(df["map"])
df["round_winner"] = le.fit_transform(df["round_winner"])
df["bomb_planted"] = df["bomb_planted"].astype(np.int16)

In [133]:
df

Unnamed: 0,time_left,ct_score,t_score,map,bomb_planted,ct_health,t_health,ct_armor,t_armor,ct_money,t_money,ct_helmets,t_helmets,ct_defuse_kits,ct_players_alive,t_players_alive,round_winner
0,175.00,0.0,0.0,1,0,500.0,500.0,0.0,0.0,4000.0,4000.0,0.0,0.0,0.0,5.0,5.0,0
1,156.03,0.0,0.0,1,0,500.0,500.0,400.0,300.0,600.0,650.0,0.0,0.0,1.0,5.0,5.0,0
2,96.03,0.0,0.0,1,0,391.0,400.0,294.0,200.0,750.0,500.0,0.0,0.0,1.0,4.0,4.0,0
3,76.03,0.0,0.0,1,0,391.0,400.0,294.0,200.0,750.0,500.0,0.0,0.0,1.0,4.0,4.0,0
4,174.97,1.0,0.0,1,0,500.0,500.0,192.0,0.0,18350.0,10750.0,0.0,0.0,1.0,5.0,5.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122405,15.41,11.0,14.0,6,1,200.0,242.0,195.0,359.0,100.0,5950.0,2.0,4.0,1.0,2.0,4.0,1
122406,174.93,11.0,15.0,6,0,500.0,500.0,95.0,175.0,11500.0,23900.0,1.0,2.0,1.0,5.0,5.0,1
122407,114.93,11.0,15.0,6,0,500.0,500.0,495.0,475.0,1200.0,6700.0,3.0,5.0,1.0,5.0,5.0,1
122408,94.93,11.0,15.0,6,0,500.0,500.0,495.0,475.0,1200.0,6700.0,3.0,5.0,1.0,5.0,5.0,1


#### Prepare the data for the models

In [139]:
# Used features
features = ["ct_score", "t_score", "ct_armor", "t_armor"]

# Round winner is the label (1 or 0)
X, y = df[features], df["round_winner"]