<a href="https://colab.research.google.com/github/curtislo93/CS2-betting-model/blob/main/CS2_Data_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from google.colab import drive
import re
import warnings
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
import seaborn as sns

warnings.filterwarnings("ignore")

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Functions

In [2]:
def calculate_round_winner(cs2, max_round):
    for round_number in range(1, max_round + 1):
        conditions = []
        choices = ['Favorite', 'Underdog', 'Favorite', 'Underdog']

        # Define conditions based on round number
        if round_number == 1:
            conditions = [
                (cs2['Moneyline A'] < cs2['Moneyline B']) & (cs2['R1 Score (A)'] > cs2['R1 Score (B)']),
                (cs2['Moneyline A'] > cs2['Moneyline B']) & (cs2['R1 Score (A)'] > cs2['R1 Score (B)']),
                (cs2['Moneyline B'] < cs2['Moneyline A']) & (cs2['R1 Score (B)'] > cs2['R1 Score (A)']),
                (cs2['Moneyline B'] > cs2['Moneyline A']) & (cs2['R1 Score (B)'] > cs2['R1 Score (A)'])
            ]
        elif round_number == 2:
            conditions = [
                (cs2['Moneyline A'] < cs2['Moneyline B']) & (cs2['R2 Score (A)'] > cs2['R2 Score (B)']),
                (cs2['Moneyline A'] > cs2['Moneyline B']) & (cs2['R2 Score (A)'] > cs2['R2 Score (B)']),
                (cs2['Moneyline B'] < cs2['Moneyline A']) & (cs2['R2 Score (B)'] > cs2['R2 Score (A)']),
                (cs2['Moneyline B'] > cs2['Moneyline A']) & (cs2['R2 Score (B)'] > cs2['R2 Score (A)'])
            ]
        elif round_number == 3:
            # Check if the 'R3 Score' columns exist
            if 'R3 Score (A)' in cs2.columns and 'R3 Score (B)' in cs2.columns:
                conditions = [
                    (cs2['Moneyline A'] < cs2['Moneyline B']) & (cs2['R3 Score (A)'] > cs2['R3 Score (B)']),
                    (cs2['Moneyline A'] > cs2['Moneyline B']) & (cs2['R3 Score (A)'] > cs2['R3 Score (B)']),
                    (cs2['Moneyline B'] < cs2['Moneyline A']) & (cs2['R3 Score (B)'] > cs2['R3 Score (A)']),
                    (cs2['Moneyline B'] > cs2['Moneyline A']) & (cs2['R3 Score (B)'] > cs2['R3 Score (A)'])
                ]

        # Apply conditions and choices
        if conditions:
            cs2[f'Round {round_number} Winner'] = np.select(conditions, choices, default='')
        else:
            cs2[f'Round {round_number} Winner'] = ''

    return cs2

# CS Data

In [3]:
fp = '/content/drive/My Drive/Sports Betting/CS2 - Data.xlsx'

cs2 = pd.read_excel(fp)

In [4]:
cs2

Unnamed: 0,Date,Type,League,Best Of,Stars,Team A,Team B,Odds Source,Moneyline A,Moneyline B,...,R2 Score (A),R2 Score (B),R3 Score (A),R3 Score (B),R4 Score (A),R4 Score (B),R5 Score (A),R5 Score (B),Score Count (A),Score Count (B)
0,2023-10-16,Intl. LAN,IEM Sydney 2023,3,2,Complexity,G2,Oddsportal,2.64,1.45,...,7.0,13.0,,,,,,,,
1,2023-10-16,Intl. LAN,IEM Sydney 2023,3,1,Monte,Grayhound,Oddsportal,1.32,3.19,...,13.0,11.0,,,,,,,,
2,2023-10-16,Intl. LAN,IEM Sydney 2023,3,1,BetBoom,GamerLegion,Oddsportal,1.65,2.18,...,13.0,6.0,,,,,,,,
3,2023-10-16,Intl. LAN,IEM Sydney 2023,3,4,Natus Vincere,MOUZ,Oddsportal,4.36,1.21,...,13.0,4.0,6.0,13.0,,,,,,
4,2023-10-16,Intl. LAN,IEM Sydney 2023,3,0,Apeks,VERTEX,Oddsportal,1.20,4.01,...,13.0,11.0,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305,2024-04-10,Online,Skyesports Masters 2024,3,1,ENCE,OG,Oddspedia,1.54,2.38,...,10.0,13.0,11.0,13.0,,,,,,
306,2024-04-09,Online,Skyesports Masters 2024,3,0,Ninjas in Pyjamas,Gods Reign,Oddspedia,1.01,12.29,...,13.0,5.0,,,,,,,,
307,2024-04-09,Online,Skyesports Masters 2024,3,0,BIG,BetBoom,Oddspedia,1.63,2.18,...,6.0,13.0,7.0,13.0,,,,,,
308,2024-04-08,Online,Skyesports Masters 2024,3,0,Aurora,OG,Oddspedia,1.52,2.44,...,13.0,9.0,,,,,,,,


In [5]:
cs2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 310 entries, 0 to 309
Data columns (total 22 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   Date             310 non-null    datetime64[ns]
 1   Type             310 non-null    object        
 2   League           310 non-null    object        
 3   Best Of          310 non-null    int64         
 4   Stars            310 non-null    int64         
 5   Team A           310 non-null    object        
 6   Team B           310 non-null    object        
 7   Odds Source      310 non-null    object        
 8   Moneyline A      309 non-null    float64       
 9   Moneyline B      309 non-null    float64       
 10  R1 Score (A)     289 non-null    float64       
 11  R1 Score (B)     289 non-null    float64       
 12  R2 Score (A)     289 non-null    float64       
 13  R2 Score (B)     289 non-null    float64       
 14  R3 Score (A)     129 non-null    float64  

In [6]:
cs2['Score Count (A)'] = (
    np.where((cs2['Best Of'] == 3) & (cs2['R1 Score (A)'] > cs2['R1 Score (B)']), 1, 0) +
    np.where((cs2['Best Of'] == 3) & (cs2['R2 Score (A)'] > cs2['R2 Score (B)']), 1, 0) +
    np.where((cs2['Best Of'] == 3) & (cs2['R3 Score (A)'] != 0) & (cs2['R3 Score (A)'] > cs2['R3 Score (B)']), 1, 0) +
    np.where((cs2['Best Of'] == 5) & (cs2['R1 Score (A)'] > cs2['R1 Score (B)']), 1, 0) +
    np.where((cs2['Best Of'] == 5) & (cs2['R2 Score (A)'] > cs2['R2 Score (B)']), 1, 0) +
    np.where((cs2['Best Of'] == 5) & (cs2['R3 Score (A)'] > cs2['R3 Score (B)']), 1, 0) +
    np.where((cs2['Best Of'] == 5) & (cs2['R4 Score (A)'] > cs2['R4 Score (B)']), 1, 0) +
    np.where((cs2['Best Of'] == 5) & (cs2['R5 Score (A)'] != 0) & (cs2['R5 Score (A)'] > cs2['R5 Score (B)']), 1, 0)
)

In [7]:
cs2['Score Count (B)'] = (
    np.where((cs2['Best Of'] == 3) & (cs2['R1 Score (B)'] > cs2['R1 Score (A)']), 1, 0) +
    np.where((cs2['Best Of'] == 3) & (cs2['R2 Score (B)'] > cs2['R2 Score (A)']), 1, 0) +
    np.where((cs2['Best Of'] == 3) & (cs2['R3 Score (A)'] != 0) & (cs2['R3 Score (B)'] > cs2['R3 Score (A)']), 1, 0) +
    np.where((cs2['Best Of'] == 5) & (cs2['R1 Score (B)'] > cs2['R1 Score (A)']), 1, 0) +
    np.where((cs2['Best Of'] == 5) & (cs2['R2 Score (B)'] > cs2['R2 Score (A)']), 1, 0) +
    np.where((cs2['Best Of'] == 5) & (cs2['R3 Score (B)'] > cs2['R3 Score (A)']), 1, 0) +
    np.where((cs2['Best Of'] == 5) & (cs2['R4 Score (B)'] > cs2['R4 Score (A)']), 1, 0) +
    np.where((cs2['Best Of'] == 5) & (cs2['R5 Score (A)'] != 0) & (cs2['R5 Score (B)'] > cs2['R5 Score (A)']), 1, 0)
)


In [8]:
cs2

Unnamed: 0,Date,Type,League,Best Of,Stars,Team A,Team B,Odds Source,Moneyline A,Moneyline B,...,R2 Score (A),R2 Score (B),R3 Score (A),R3 Score (B),R4 Score (A),R4 Score (B),R5 Score (A),R5 Score (B),Score Count (A),Score Count (B)
0,2023-10-16,Intl. LAN,IEM Sydney 2023,3,2,Complexity,G2,Oddsportal,2.64,1.45,...,7.0,13.0,,,,,,,0,2
1,2023-10-16,Intl. LAN,IEM Sydney 2023,3,1,Monte,Grayhound,Oddsportal,1.32,3.19,...,13.0,11.0,,,,,,,2,0
2,2023-10-16,Intl. LAN,IEM Sydney 2023,3,1,BetBoom,GamerLegion,Oddsportal,1.65,2.18,...,13.0,6.0,,,,,,,2,0
3,2023-10-16,Intl. LAN,IEM Sydney 2023,3,4,Natus Vincere,MOUZ,Oddsportal,4.36,1.21,...,13.0,4.0,6.0,13.0,,,,,1,2
4,2023-10-16,Intl. LAN,IEM Sydney 2023,3,0,Apeks,VERTEX,Oddsportal,1.20,4.01,...,13.0,11.0,,,,,,,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305,2024-04-10,Online,Skyesports Masters 2024,3,1,ENCE,OG,Oddspedia,1.54,2.38,...,10.0,13.0,11.0,13.0,,,,,1,2
306,2024-04-09,Online,Skyesports Masters 2024,3,0,Ninjas in Pyjamas,Gods Reign,Oddspedia,1.01,12.29,...,13.0,5.0,,,,,,,2,0
307,2024-04-09,Online,Skyesports Masters 2024,3,0,BIG,BetBoom,Oddspedia,1.63,2.18,...,6.0,13.0,7.0,13.0,,,,,1,2
308,2024-04-08,Online,Skyesports Masters 2024,3,0,Aurora,OG,Oddspedia,1.52,2.44,...,13.0,9.0,,,,,,,2,0


In [9]:
cs2['Number of Rounds'] = cs2['Score Count (A)'] + cs2['Score Count (B)']

In [10]:
# Initialize the Moneyline Result column
cs2['Moneyline Result'] = np.where(cs2['Score Count (A)'] > cs2['Score Count (B)'],
                                   cs2['Moneyline A'],
                                   np.where(cs2['Score Count (B)'] > cs2['Score Count (A)'],
                                            cs2['Moneyline B'],
                                            np.nan))

In [11]:
conditions = [
    (cs2['Moneyline Result'] == cs2['Moneyline A']) & (cs2['Moneyline A'] < cs2['Moneyline B']),
    (cs2['Moneyline Result'] == cs2['Moneyline B']) & (cs2['Moneyline B'] < cs2['Moneyline A']),
    (cs2['Moneyline Result'] == cs2['Moneyline B']) & (cs2['Moneyline B'] > cs2['Moneyline A']),
    (cs2['Moneyline Result'] == cs2['Moneyline A']) & (cs2['Moneyline A'] > cs2['Moneyline B'])
]

# Results
results = ['Favorite', 'Favorite', 'Underdog', 'Underdog']

# Add Winner column based on Moneyline comparison
cs2['Winner'] = np.select(conditions, results, default=np.nan)

In [12]:
cs2['Winning Team'] = np.where(
    cs2['Moneyline Result'] == cs2['Moneyline A'],
    cs2['Team A'],
    np.where(
        cs2['Moneyline Result'] == cs2['Moneyline B'],
        cs2['Team B'],
        np.nan
    )
)

In [13]:
cs2 = cs2[cs2['Best Of'] == 3]

In [14]:
columns_to_drop = ['R4 Score (A)', 'R4 Score (B)', 'R5 Score (A)', 'R5 Score (B)']

cs2 = cs2.drop(columns=columns_to_drop, axis=1)

In [15]:
cs2.head()

Unnamed: 0,Date,Type,League,Best Of,Stars,Team A,Team B,Odds Source,Moneyline A,Moneyline B,...,R2 Score (A),R2 Score (B),R3 Score (A),R3 Score (B),Score Count (A),Score Count (B),Number of Rounds,Moneyline Result,Winner,Winning Team
0,2023-10-16,Intl. LAN,IEM Sydney 2023,3,2,Complexity,G2,Oddsportal,2.64,1.45,...,7.0,13.0,,,0,2,2,1.45,Favorite,G2
1,2023-10-16,Intl. LAN,IEM Sydney 2023,3,1,Monte,Grayhound,Oddsportal,1.32,3.19,...,13.0,11.0,,,2,0,2,1.32,Favorite,Monte
2,2023-10-16,Intl. LAN,IEM Sydney 2023,3,1,BetBoom,GamerLegion,Oddsportal,1.65,2.18,...,13.0,6.0,,,2,0,2,1.65,Favorite,BetBoom
3,2023-10-16,Intl. LAN,IEM Sydney 2023,3,4,Natus Vincere,MOUZ,Oddsportal,4.36,1.21,...,13.0,4.0,6.0,13.0,1,2,3,1.21,Favorite,MOUZ
4,2023-10-16,Intl. LAN,IEM Sydney 2023,3,0,Apeks,VERTEX,Oddsportal,1.2,4.01,...,13.0,11.0,,,2,0,2,1.2,Favorite,Apeks


### Round Winners

In [16]:
cs2 = calculate_round_winner(cs2, 3)
cs2

Unnamed: 0,Date,Type,League,Best Of,Stars,Team A,Team B,Odds Source,Moneyline A,Moneyline B,...,R3 Score (B),Score Count (A),Score Count (B),Number of Rounds,Moneyline Result,Winner,Winning Team,Round 1 Winner,Round 2 Winner,Round 3 Winner
0,2023-10-16,Intl. LAN,IEM Sydney 2023,3,2,Complexity,G2,Oddsportal,2.64,1.45,...,,0,2,2,1.45,Favorite,G2,Favorite,Favorite,
1,2023-10-16,Intl. LAN,IEM Sydney 2023,3,1,Monte,Grayhound,Oddsportal,1.32,3.19,...,,2,0,2,1.32,Favorite,Monte,Favorite,Favorite,
2,2023-10-16,Intl. LAN,IEM Sydney 2023,3,1,BetBoom,GamerLegion,Oddsportal,1.65,2.18,...,,2,0,2,1.65,Favorite,BetBoom,Favorite,Favorite,
3,2023-10-16,Intl. LAN,IEM Sydney 2023,3,4,Natus Vincere,MOUZ,Oddsportal,4.36,1.21,...,13.0,1,2,3,1.21,Favorite,MOUZ,Favorite,Underdog,Favorite
4,2023-10-16,Intl. LAN,IEM Sydney 2023,3,0,Apeks,VERTEX,Oddsportal,1.20,4.01,...,,2,0,2,1.20,Favorite,Apeks,Favorite,Favorite,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305,2024-04-10,Online,Skyesports Masters 2024,3,1,ENCE,OG,Oddspedia,1.54,2.38,...,13.0,1,2,3,2.38,Underdog,OG,Favorite,Underdog,Underdog
306,2024-04-09,Online,Skyesports Masters 2024,3,0,Ninjas in Pyjamas,Gods Reign,Oddspedia,1.01,12.29,...,,2,0,2,1.01,Favorite,Ninjas in Pyjamas,Favorite,Favorite,
307,2024-04-09,Online,Skyesports Masters 2024,3,0,BIG,BetBoom,Oddspedia,1.63,2.18,...,13.0,1,2,3,2.18,Underdog,BetBoom,Favorite,Underdog,Underdog
308,2024-04-08,Online,Skyesports Masters 2024,3,0,Aurora,OG,Oddspedia,1.52,2.44,...,,2,0,2,1.52,Favorite,Aurora,Favorite,Favorite,


In [17]:
cs2['Underdog Comeback'] = np.where(
    (cs2['Round 2 Winner'] == 'Underdog') & (cs2['Round 3 Winner'] == 'Underdog'),
    'Yes',
    'No'
)


In [18]:
cs2.head()

Unnamed: 0,Date,Type,League,Best Of,Stars,Team A,Team B,Odds Source,Moneyline A,Moneyline B,...,Score Count (A),Score Count (B),Number of Rounds,Moneyline Result,Winner,Winning Team,Round 1 Winner,Round 2 Winner,Round 3 Winner,Underdog Comeback
0,2023-10-16,Intl. LAN,IEM Sydney 2023,3,2,Complexity,G2,Oddsportal,2.64,1.45,...,0,2,2,1.45,Favorite,G2,Favorite,Favorite,,No
1,2023-10-16,Intl. LAN,IEM Sydney 2023,3,1,Monte,Grayhound,Oddsportal,1.32,3.19,...,2,0,2,1.32,Favorite,Monte,Favorite,Favorite,,No
2,2023-10-16,Intl. LAN,IEM Sydney 2023,3,1,BetBoom,GamerLegion,Oddsportal,1.65,2.18,...,2,0,2,1.65,Favorite,BetBoom,Favorite,Favorite,,No
3,2023-10-16,Intl. LAN,IEM Sydney 2023,3,4,Natus Vincere,MOUZ,Oddsportal,4.36,1.21,...,1,2,3,1.21,Favorite,MOUZ,Favorite,Underdog,Favorite,No
4,2023-10-16,Intl. LAN,IEM Sydney 2023,3,0,Apeks,VERTEX,Oddsportal,1.2,4.01,...,2,0,2,1.2,Favorite,Apeks,Favorite,Favorite,,No


# Analysis

## Winner overall

In [19]:
ud_win_overall = cs2['Winner'].eq("Underdog").sum()
ud_win_overall

96

In [20]:
# Total matches
total_matches = cs2['Winner'].count()

In [21]:
ud_win_pct = ud_win_overall / total_matches
ud_win_pct

0.31475409836065577

## Filter for strategy (map 3)

In [22]:
cs2 = cs2[cs2['Number of Rounds'] == 3]
cs2

Unnamed: 0,Date,Type,League,Best Of,Stars,Team A,Team B,Odds Source,Moneyline A,Moneyline B,...,Score Count (A),Score Count (B),Number of Rounds,Moneyline Result,Winner,Winning Team,Round 1 Winner,Round 2 Winner,Round 3 Winner,Underdog Comeback
3,2023-10-16,Intl. LAN,IEM Sydney 2023,3,4,Natus Vincere,MOUZ,Oddsportal,4.36,1.21,...,1,2,3,1.21,Favorite,MOUZ,Favorite,Underdog,Favorite,No
5,2023-10-16,Intl. LAN,IEM Sydney 2023,3,3,Vitality,FaZe,Oddsportal,1.55,2.40,...,1,2,3,2.40,Underdog,FaZe,Favorite,Underdog,Underdog,Yes
6,2023-10-17,Intl. LAN,IEM Sydney 2023,3,3,Natus Vincere,FaZe,Oddsportal,4.22,1.20,...,1,2,3,1.20,Favorite,FaZe,Favorite,Underdog,Favorite,No
8,2023-10-17,Intl. LAN,IEM Sydney 2023,3,2,Monte,fnatic,Oddsportal,1.92,1.82,...,2,1,3,1.92,Underdog,Monte,Favorite,Underdog,Underdog,Yes
9,2023-10-17,Intl. LAN,IEM Sydney 2023,3,2,ENCE,fnatic,Oddsportal,1.42,2.73,...,2,1,3,1.42,Favorite,ENCE,Underdog,Favorite,Favorite,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
299,2024-04-13,Online,Skyesports Masters 2024,3,0,OG,BIG,Oddspedia,2.29,1.58,...,2,1,3,2.29,Underdog,OG,Underdog,Favorite,Underdog,No
300,2024-04-12,Online,Skyesports Masters 2024,3,0,BIG,FORZE,Oddspedia,1.30,3.37,...,2,1,3,1.30,Favorite,BIG,Underdog,Favorite,Favorite,No
305,2024-04-10,Online,Skyesports Masters 2024,3,1,ENCE,OG,Oddspedia,1.54,2.38,...,1,2,3,2.38,Underdog,OG,Favorite,Underdog,Underdog,Yes
307,2024-04-09,Online,Skyesports Masters 2024,3,0,BIG,BetBoom,Oddspedia,1.63,2.18,...,1,2,3,2.18,Underdog,BetBoom,Favorite,Underdog,Underdog,Yes


In [23]:
# Round 3 Underdog Winner
r3_ud = cs2['Round 3 Winner'].eq("Underdog").sum()
r3_ud

54

In [24]:
# Round 3 Underdog Winner divided by Total 3 Round Matches
r3_ud_win = r3_ud / cs2['Round 3 Winner'].count()

In [25]:
r3_ud_win.round(5)

0.43548

In [26]:
underdog_comeback = ((cs2['Round 2 Winner'] == "Underdog") & (cs2['Round 3 Winner'] == "Underdog")).sum()
underdog_comeback

24

In [27]:
favorite_comeback = ((cs2['Round 2 Winner'] == "Favorite") & (cs2['Round 3 Winner'] == "Favorite")).sum()
favorite_comeback

33

In [28]:
ud_cb = underdog_comeback / cs2['Round 3 Winner'].count()
ud_cb

0.1935483870967742

In [29]:
fv_cb = favorite_comeback / cs2['Round 3 Winner'].count()
fv_cb

0.2661290322580645

# Results

In [30]:
print("Underdogs win", round((ud_win_pct)*100,5), "% of the time overall.")
print("Fair price =", round(1/ud_win_pct, 5),".")
print()
print("Underdogs win", round(r3_ud_win * 100, 5), "% of the time in map 3.")
print("Fair price =", round(1/r3_ud_win, 5),".")
print()
print("Underdogs comeback at", round(ud_cb * 100, 5), "% of the time.")
print("Fair price =", round(1/ud_cb, 5),".")

Underdogs win 31.47541 % of the time overall.
Fair price = 3.17708 .

Underdogs win 43.54839 % of the time in map 3.
Fair price = 2.2963 .

Underdogs comeback at 19.35484 % of the time.
Fair price = 5.16667 .


In [31]:
print("Favorites win", round((1-(ud_win_pct))*100,5), "% of the time overall.")
print("Fair price =", round(1/(1- ud_win_pct), 5),".")
print()
print("Favorites win", round((1 - r3_ud_win) * 100, 5), "% of the time in map 3.")
print("Fair price =", round(1/(1 - r3_ud_win), 5),".")
print()
print("Favorites comeback at", round(fv_cb * 100, 5), "% of the time.")
print("Fair price =", round(1/fv_cb, 5),".")

Favorites win 68.52459 % of the time overall.
Fair price = 1.45933 .

Favorites win 56.45161 % of the time in map 3.
Fair price = 1.77143 .

Favorites comeback at 26.6129 % of the time.
Fair price = 3.75758 .
