In [3]:
import numpy as np
import cbbpy.mens_scraper as s

In [4]:
games = s.get_games_season(2024) # Get Game Data

No games on 05/01/24: 100%|██████████| 183 of 183 days scraped in 679.9 sec          


In [132]:
print(games[0].iloc[-100]) # Game Entry

game_id                     401641104
home_team         Cincinnati Bearcats
home_id                          2132
home_rank                         NaN
home_record                     21-14
home_score                         73
away_team          San Francisco Dons
away_id                          2539
away_rank                         NaN
away_record                     23-11
away_score                         72
home_win                         True
num_ots                             1
is_conference                   False
is_neutral                      False
is_postseason                    True
tournament            NIT - 1st Round
game_day               March 20, 2024
game_time                06:00 PM PDT
game_loc               Cincinnati, OH
arena               Fifth Third Arena
arena_capacity                    NaN
attendance                       3899
tv_network                      ESPN+
referee_1                Craig Murley
referee_2                Shane Staggs
referee_3   

In [105]:
# Create and Enumerate List of Keys (Team Names)

teams = []

with open('2023.txt') as f:
    lines = f.readlines()

for l in lines:
    d = l.split(",") # "year","month","day","team","opponent","location","teamscore","oppscore","canceled","postponed","OT","D1"
    
    t1 = d[3]
    t2 = d[4]

    # Add Teams that are not in the Network
    if t1 not in teams:
        teams.append(t1)

    if t2 not in teams:
        teams.append(t2)

num_teams = len(teams) # Number of Teams
team_index = {}
index_team = {}
n = np.zeros((num_teams,num_teams)) # Initialize Adjacency Matrix - Used Later On

## Enumerate
for count, team in enumerate(teams):
    team_index[team]=count
    index_team[count] = team

In [155]:
# Print 10 Keys, Values from within team_index
for key in list(team_index.keys())[:10]:
    print(key + ": " + str(team_index[key]))

"A&M-Corpus Christi": 0
"Mississippi St.": 1
"UTSA": 2
"Trinity (TX)": 3
"UTRGV": 4
"CSU Bakersfield": 5
"Alcorn": 6
"UTEP": 7
"Texas Lutheran": 8
"Arizona": 9


In [133]:
with open('2023.txt') as f:
    lines = f.readlines()

for l in lines:
    d = l.split(",")
    t1 = d[3]
    t2 = d[4]

    # Team Indices in Adjacency Matrix
    i1 = team_index[t1]
    i2 = team_index[t2]

    # Compute Value
    try:
        if d[6] > d[7]:
            n[i1][i2] += max(float(d[6]) / float(d[7]) - 1, 1) # Max Value of 1
        else:
            n[i2][i1] += max(float(d[7]) / float(d[6]) - 1, 1) # Max Value of 1
    except Exception:
        continue

In [146]:
# Degree Centrality
def degree_centrality(n):
    d = [sum(n[i]) for i in range(num_teams)]
    sort_index = np.argsort(d)
    return sort_index

# Power Series Iteration Centrality
def pr(n, alpha=0.85):
    d = [sum(n.T[i]) for i in range(num_teams)] # degree centrality
    D = np.identity(num_teams) # Identity Matrix

    # Normalize Each Row i of D for the total sum of Row i
    for i in range(num_teams):
        if d[i] != 0: D[i][i] = 1/d[i]
    
    P = n@D
    e = np.ones(num_teams)/num_teams # ones vector
    x = np.zeros(num_teams) # zero vector
    P = alpha*P
    P1 = np.copy(P)

    # Iterate - Series Converges at 5 Terms
    for k in range(5):
        P = P1 @ P
        x += P@e

    sort_index = np.argsort(x) # Sort Arguments by Value
    return sort_index

In [147]:
# alpha = 0.9
s = pr(n, alpha=0.9)
for i in range(25):
    print(str(1+i) + ": " + str(index_team[s[-i]]))

1: "Guilford"
2: "Kansas"
3: "Purdue"
4: "Tennessee"
5: "Alabama"
6: "Texas"
7: "UCLA"
8: "Kansas St."
9: "Houston"
10: "Baylor"
11: "Texas A&M"
12: "UConn"
13: "Saint Mary's (CA)"
14: "Duke"
15: "Iowa St."
16: "Arizona"
17: "Marquette"
18: "Indiana"
19: "Kentucky"
20: "San Diego St."
21: "Miami (FL)"
22: "Arkansas"
23: "Boise St."
24: "VCU"
25: "Maryland"


In [148]:
# alpha = 0.85
s = pr(n)
for i in range(25):
    print(str(1+i) + ": " + str(index_team[s[-i]]))

1: "Guilford"
2: "Kansas"
3: "Purdue"
4: "Tennessee"
5: "Alabama"
6: "Texas"
7: "UCLA"
8: "Kansas St."
9: "Houston"
10: "Baylor"
11: "Texas A&M"
12: "UConn"
13: "Saint Mary's (CA)"
14: "Duke"
15: "Iowa St."
16: "Arizona"
17: "Marquette"
18: "Indiana"
19: "Kentucky"
20: "San Diego St."
21: "Miami (FL)"
22: "Arkansas"
23: "Boise St."
24: "VCU"
25: "Maryland"


In [143]:
# alpha = 0.65
s = pr(n, alpha=0.65)
for i in range(25):
    print(str(1+i) + ": " + str(index_team[s[-i]]))

1: "Ouachita Baptist"
2: "Alabama"
3: "Saint Mary's (CA)"
4: "Houston"
5: "Kansas"
6: "Tennessee"
7: "San Diego St."
8: "Purdue"
9: "Gonzaga"
10: "Utah St."
11: "Boise St."
12: "Arizona"
13: "Nevada"
14: "Arkansas"
15: "New Mexico"
16: "Baylor"
17: "Indiana"
18: "Texas"
19: "Hofstra"
20: "Fla. Atlantic"
21: "UCLA"
22: "Iowa St."
23: "Kentucky"
24: "UConn"
25: "Kansas St."


In [144]:
# 0.25
s = pr(n, alpha=0.25)
for i in range(20):
    print(str(i+1) + ": " + str(index_team[s[-i]]))

1: "Miami-Hamilton"
2: "Saint Mary's (CA)"
3: "Houston"
4: "Alabama"
5: "Utah St."
6: "San Diego St."
7: "Gonzaga"
8: "Fla. Atlantic"
9: "Hofstra"
10: "New Mexico"
11: "Kansas"
12: "Tennessee"
13: "Purdue"
14: "Nevada"
15: "Boise St."
16: "Arkansas"
17: "Indiana"
18: "Arizona"
19: "Liberty"
20: "Louisiana"


In [145]:
s = degree_centrality(n)
for i in range(20):
    print(str(i+1) + ": " + str(index_team[s[-i]]))

1: "Academy of Art"
2: "Houston"
3: "Col. of Charleston"
4: "Tennessee"
5: "UCLA"
6: "Texas"
7: "Saint Mary's (CA)"
8: "Marquette"
9: "Purdue"
10: "Drake"
11: "Iona"
12: "Bradley"
13: "North Texas"
14: "Duke"
15: "Kansas"
16: "Fla. Atlantic"
17: "UConn"
18: "Liberty"
19: "VCU"
20: "Hofstra"
