In [None]:
library()

In [None]:
install.packages("zoo")
install.packages("hoopR")
install.packages("kernlab")
install.packages("tidymodels")
install.packages("AzureStor")

In [None]:

library(tidyverse)
library(zoo)
library(hoopR)
library(tidymodels)
library(kernlab)
library(AzureStor)


In [None]:
set.seed(1234)

In [None]:
kenpom <- read_csv("kenpom_2023.csv")
namekey <- read_csv("http://mattwaite.github.io/sportsdatafiles/nametable.csv")
simplestats <- read_csv("simplestats.csv")

In [None]:
head(filter(kenpom, Season==2018))

In [None]:
head(namekey)

In [None]:
head(simplestats)

In [None]:
years = c(2018,2019,2021,2022,2023)
teamgames <- load_mbb_team_box(seasons = years) 
teamgames <- teamgames %>%
  mutate(team_short_display_name = ifelse(team_short_display_name=="Kansas St" & season>2021, "Kansas State", team_short_display_name)) %>%
  mutate(team_short_display_name = ifelse(team_short_display_name=="San Diego St" & season>2021, "San Diego State", team_short_display_name)) %>%
  mutate(team_short_display_name = ifelse(team_short_display_name=="Michigan St" & season>2021, "Michigan State", team_short_display_name))

head(select(filter(filter(teamgames, season>=2022), team_short_display_name=="San Diego St"), team_short_display_name), n=50)
teamgames <- teamgames %>%
  
  separate(field_goals_made_field_goals_attempted, into = c("field_goals_made","field_goals_attempted")) %>%
  separate(three_point_field_goals_made_three_point_field_goals_attempted, into = c("three_point_field_goals_made","three_point_field_goals_attempted")) %>%
  separate(free_throws_made_free_throws_attempted, into = c("free_throws_made","free_throws_attempted")) %>%
  mutate_at(12:34, as.numeric) %>% 
  mutate(team_id = as.numeric(team_id))

  teamgames <- teamgames %>% left_join(namekey) %>% 
    left_join(kenpom, by=c("team_short_display_name" = "TeamName", "season" = "Season")) %>% 
    left_join(simplestats, by=c("School" = "School", "season" = "Season"))  

In [None]:
teamstats <- teamgames %>% 
  group_by(team_short_display_name, season) %>%
  arrange(game_date) %>%
  mutate(
    team_score = ((field_goals_made - three_point_field_goals_made) * 2) + (three_point_field_goals_made * 3) + free_throws_made,
    possessions = (field_goals_attempted - offensive_rebounds + turnovers + (.475 * free_throws_attempted)),
    points_per_possession = (team_score / possessions),
    true_shooting_percentage = (team_score / (2 * (field_goals_attempted + (.44 * free_throws_attempted)))) * 100,
    free_throw_rate = (free_throws_attempted / field_goals_attempted),
    team_offensive_efficiency = (team_score/possessions) * 100,
    team_cm_points_per_possession = lag(cummean(points_per_possession), n = 1, default = 0),
    team_cm_true_shooting_percentage = lag(cummean(true_shooting_percentage), n = 1, default = 0),
    team_cm_free_throw_rate = lag(cummean(free_throw_rate), n = 1, default = 0),
    team_cm_offensive_efficiency = lag(cummean(team_offensive_efficiency), n = 1, default = 0),
  ) %>% 
  ungroup() %>% 
  rename(
    team_sos = OverallSOS,
    team_srs = OverallSRS
    )

teamstats <- teamstats %>% 
  select(game_id, team_id, team_offensive_efficiency) %>%
  mutate(team_id = as.numeric(team_id)) %>% 
  rename(opponent_id = team_id, 
         opponent_offensive_efficiency = team_offensive_efficiency) %>% 
  left_join(teamstats) %>%
  group_by(team_short_display_name, season) %>%
  arrange(game_date) %>%
  mutate(
    team_cm_defensive_efficiency = lag(cummean(opponent_offensive_efficiency), n = 1, default = 0)
    ) %>% 
  ungroup()

opponent <- teamstats %>% 
  select(game_id, team_id, offensive_rebounds, defensive_rebounds) %>% 
  rename(opponent_id = team_id, 
         opponent_offensive_rebounds = offensive_rebounds, 
         opponent_defensive_rebounds = defensive_rebounds) %>% 
  mutate(opponent_id = as.numeric(opponent_id))

newteamstats <- teamstats %>% 
  inner_join(opponent) %>% 
  mutate(
    orb_percentage = offensive_rebounds / (offensive_rebounds + opponent_defensive_rebounds),
    drb_percentage = defensive_rebounds / (opponent_offensive_rebounds + defensive_rebounds),
    team_cm_orb_percentage = lag(cummean(orb_percentage), n = 1, default = 0),
    team_cm_drb_percentage = lag(cummean(drb_percentage), n = 1, default = 0),
    steal_turnover_ratio = (steals / turnovers),
    team_cm_steal_turnover_ratio = lag(cummean(steal_turnover_ratio), n = 1, default = 0),
    net_efficiency = (team_offensive_efficiency - opponent_offensive_efficiency)
    )

team_side <- newteamstats %>%
  select(game_id, team_id, team_short_display_name, opponent_id, game_date, season, team_score, team_cm_points_per_possession, team_cm_true_shooting_percentage, team_cm_free_throw_rate, team_cm_orb_percentage, team_cm_drb_percentage, team_cm_steal_turnover_ratio, team_cm_offensive_efficiency, team_cm_defensive_efficiency, team_srs) %>% 
  na.omit()

opponent_side <- newteamstats %>%
  select(game_id, team_id, team_short_display_name, game_date, season, team_score, team_cm_points_per_possession, team_cm_true_shooting_percentage, team_cm_free_throw_rate, team_cm_orb_percentage, team_cm_drb_percentage, team_cm_steal_turnover_ratio, team_cm_offensive_efficiency, team_cm_defensive_efficiency, team_srs) %>% 
  na.omit() %>%
  rename(
    opponent_id = team_id,
    opponent_short_display_name = team_short_display_name,
    opponent_score = team_score,
    opponent_cm_points_per_possession = team_cm_points_per_possession,
    opponent_cm_true_shooting_percentage = team_cm_true_shooting_percentage,
    opponent_cm_free_throw_rate = team_cm_free_throw_rate,
    opponent_cm_orb_percentage = team_cm_orb_percentage,
    opponent_cm_drb_percentage = team_cm_drb_percentage,
    opponent_cm_offensive_efficiency = team_cm_offensive_efficiency,
    opponent_cm_defensive_efficiency = team_cm_defensive_efficiency,
    opponent_cm_steal_turnover_ratio = team_cm_steal_turnover_ratio,
    opponent_srs = team_srs
  ) %>%
  mutate(
    opponent_id = as.numeric(opponent_id)
    )

games <- team_side %>% 
  inner_join(opponent_side) %>% 
  mutate(
  TeamResult = as.factor(case_when(
    team_score > opponent_score ~ "W",
    opponent_score > team_score ~ "L"
))) %>% 
  na.omit()

games$TeamResult <- relevel(games$TeamResult, ref = "W")

cumulativemodelgames <- games %>% 
  select(game_id, game_date, team_short_display_name, opponent_short_display_name, season, team_cm_points_per_possession, team_cm_true_shooting_percentage, team_cm_free_throw_rate, team_cm_orb_percentage, team_cm_steal_turnover_ratio, team_cm_offensive_efficiency, team_cm_defensive_efficiency, team_srs, opponent_cm_points_per_possession, opponent_cm_true_shooting_percentage, opponent_cm_free_throw_rate, opponent_cm_orb_percentage, opponent_cm_steal_turnover_ratio, opponent_cm_offensive_efficiency, opponent_cm_defensive_efficiency, opponent_srs, TeamResult)

In [None]:
summary(cumulativemodelgames)

In [None]:
cumulative_split <- initial_split(cumulativemodelgames, prop = .8)
cumulative_train <- training(cumulative_split)
cumulative_test <- testing(cumulative_split)

In [None]:
cumulative_recipe <- 
  recipe(TeamResult ~ ., data = cumulative_train) %>% 
  update_role(game_id, game_date, team_short_display_name, opponent_short_display_name, season, new_role = "ID") %>%
  step_normalize(all_predictors())

summary(cumulative_recipe)

In [None]:
svm_mod <- 
  svm_poly() %>%
  set_engine("kernlab") %>%
  set_mode("classification") 

In [None]:
svm_cumulative_workflow <- 
  workflow() %>% 
  add_model(svm_mod) %>% 
  add_recipe(cumulative_recipe)

In [None]:
svm_cumulative_fit <- 
  svm_cumulative_workflow %>% 
  fit(data = cumulative_train)

svmcumulativetrainresults <- cumulative_train %>%
  bind_cols(predict(svm_cumulative_fit, cumulative_train))

metrics(svmcumulativetrainresults, truth = TeamResult, estimate = .pred_class)

In [None]:
svmcumulativetestresults <- cumulative_test %>%
  bind_cols(predict(svm_cumulative_fit, cumulative_test))

metrics(svmcumulativetestresults, truth = TeamResult, estimate = .pred_class)

In [None]:
teamstats <- teamgames %>% 
  group_by(team_short_display_name, season) %>%
  arrange(game_date) %>%
  mutate(
    team_score = ((field_goals_made - three_point_field_goals_made) * 2) + (three_point_field_goals_made * 3) + free_throws_made,
    possessions = (field_goals_attempted - offensive_rebounds + turnovers + (.475 * free_throws_attempted)),
    points_per_possession = (team_score / possessions),
    true_shooting_percentage = (team_score / (2 * (field_goals_attempted + (.44 * free_throws_attempted)))) * 100,
    free_throw_rate = (free_throws_attempted / field_goals_attempted),
    team_offensive_efficiency = (team_score/possessions) * 100,
    team_cm_points_per_possession = lag(cummean(points_per_possession), n = 1, default = 0),
    team_cm_true_shooting_percentage = lag(cummean(true_shooting_percentage), n = 1, default = 0),
    team_cm_free_throw_rate = lag(cummean(free_throw_rate), n = 1, default = 0),
    team_cm_offensive_efficiency = lag(cummean(team_offensive_efficiency), n = 1, default = 0),
  ) %>% 
  ungroup() %>% 
  rename(
    team_sos = OverallSOS,
    team_srs = OverallSRS
    )

teamstats <- teamstats %>% 
  select(game_id, team_id, team_offensive_efficiency) %>%
  mutate(team_id = as.numeric(team_id)) %>% 
  rename(opponent_id = team_id, 
         opponent_offensive_efficiency = team_offensive_efficiency) %>% 
  left_join(teamstats) %>%
  group_by(team_short_display_name, season) %>%
  arrange(game_date) %>%
  mutate(
    team_cm_defensive_efficiency = lag(cummean(opponent_offensive_efficiency), n = 1, default = 0)
    ) %>% 
  ungroup()

opponent <- teamstats %>% 
  select(game_id, team_id, offensive_rebounds, defensive_rebounds) %>% 
  rename(opponent_id = team_id, 
         opponent_offensive_rebounds = offensive_rebounds, 
         opponent_defensive_rebounds = defensive_rebounds) %>% 
  mutate(opponent_id = as.numeric(opponent_id))

newteamstats <- teamstats %>% 
  inner_join(opponent) %>% 
  mutate(
    orb_percentage = offensive_rebounds / (offensive_rebounds + opponent_defensive_rebounds),
    drb_percentage = defensive_rebounds / (opponent_offensive_rebounds + defensive_rebounds),
    team_cm_orb_percentage = lag(cummean(orb_percentage), n = 1, default = 0),
    team_cm_drb_percentage = lag(cummean(drb_percentage), n = 1, default = 0),
    steal_turnover_ratio = (steals / turnovers),
    team_cm_steal_turnover_ratio = lag(cummean(steal_turnover_ratio), n = 1, default = 0),
    net_efficiency = (team_offensive_efficiency - opponent_offensive_efficiency)
    )

team_side <- newteamstats %>%
  select(game_id, team_id, team_short_display_name, opponent_id, game_date, season, team_score, team_cm_points_per_possession, team_cm_true_shooting_percentage, team_cm_free_throw_rate, team_cm_orb_percentage, team_cm_drb_percentage, team_cm_steal_turnover_ratio, team_cm_offensive_efficiency, team_cm_defensive_efficiency, team_srs) %>% 
  na.omit()

opponent_side <- newteamstats %>%
  select(game_id, team_id, team_short_display_name, game_date, season, team_score, team_cm_points_per_possession, team_cm_true_shooting_percentage, team_cm_free_throw_rate, team_cm_orb_percentage, team_cm_drb_percentage, team_cm_steal_turnover_ratio, team_cm_offensive_efficiency, team_cm_defensive_efficiency, team_srs) %>% 
  na.omit() %>%
  rename(
    opponent_id = team_id,
    opponent_short_display_name = team_short_display_name,
    opponent_score = team_score,
    opponent_cm_points_per_possession = team_cm_points_per_possession,
    opponent_cm_true_shooting_percentage = team_cm_true_shooting_percentage,
    opponent_cm_free_throw_rate = team_cm_free_throw_rate,
    opponent_cm_orb_percentage = team_cm_orb_percentage,
    opponent_cm_drb_percentage = team_cm_drb_percentage,
    opponent_cm_offensive_efficiency = team_cm_offensive_efficiency,
    opponent_cm_defensive_efficiency = team_cm_defensive_efficiency,
    opponent_cm_steal_turnover_ratio = team_cm_steal_turnover_ratio,
    opponent_srs = team_srs
  ) %>%
  mutate(
    opponent_id = as.numeric(opponent_id)
    )

games <- team_side %>% 
  inner_join(opponent_side) %>% 
  mutate(
  TeamResult = as.factor(case_when(
    team_score > opponent_score ~ "W",
    opponent_score > team_score ~ "L"
))) %>% 
  na.omit()

games$TeamResult <- relevel(games$TeamResult, ref = "W")

cumulativemodelgames <- games %>% 
  select(game_id, game_date, team_short_display_name, opponent_short_display_name, season, team_cm_points_per_possession, team_cm_true_shooting_percentage, team_cm_free_throw_rate, team_cm_orb_percentage, team_cm_steal_turnover_ratio, team_cm_offensive_efficiency, team_cm_defensive_efficiency, team_srs, opponent_cm_points_per_possession, opponent_cm_true_shooting_percentage, opponent_cm_free_throw_rate, opponent_cm_orb_percentage, opponent_cm_steal_turnover_ratio, opponent_cm_offensive_efficiency, opponent_cm_defensive_efficiency, opponent_srs, TeamResult)

In [None]:
# improvement below

run_model <- function(team1, team2){

  championshipgame <- tibble(
  team_short_display_name=team1,
  opponent_short_display_name=team2
  )

  championshipgame <- cumulativemodelgames %>% 
  group_by(team_short_display_name) %>% 
  filter(game_date == max(game_date) & season == 2023) %>% 
  select(-TeamResult, -starts_with("opponent")) %>% 
  right_join(championshipgame, multiple = "all")

  championshipgame <- cumulativemodelgames %>% 
    group_by(opponent_short_display_name) %>% 
    filter(game_date == max(game_date) & season == 2023) %>% 
    ungroup() %>% 
    select(-TeamResult, -starts_with("team"), -game_id, -game_date, -season) %>% right_join(championshipgame)

  championship <- svm_cumulative_fit %>% 
    predict(new_data = championshipgame) %>%
    bind_cols(championshipgame) %>% 
    select(.pred_class, team_short_display_name, opponent_short_display_name, everything())

  return(championship)
}


df1 <- run_model("San Diego State", "UConn")


In [None]:
df1

In [None]:
account_endpoint <- "https://blakestorageaccount581.blob.core.windows.net"
account_key <- "nkkvw1xse99lhH2Cdf89T3PXbRpy/UAhdifhBddYew0QLOvcl8e2m/zMICpkFJt7+RufBQPgySLC+AStHxZGbQ=="
container_name <- "model-csv-files"
bl_endp_key <- storage_endpoint(account_endpoint, key=account_key)
cont <- storage_container(bl_endp_key, container_name)
write.csv(df1, "model_pred3.csv",row.names = FALSE)
upload_blob(cont, src="model_pred3.csv", dest="model_pred4.csv")

In [None]:

df1 <- run_model("Kansas State", "Michigan State")
df2 <- run_model("FAU", "Tennessee")
df4 <- run_model("UConn", "Arkansas")
df3 <- run_model("UCLA", "Gonzaga")
df5 <- run_model("Alabama", "San Diego State")
df6 <- run_model("Creighton", "Princeton")
df7 <- run_model("Houston", "Miami")
df8 <- run_model("Texas", "Xavier")

df9 <- rbind(df1,df2)
df10 <- rbind(df9, df4)
df11 <- rbind(df10, df3)
df12 <- rbind(df11, df5)
df13 <- rbind(df12, df6)
df14 <- rbind(df13, df7)
df15 <- rbind(df14, df8)
df15

write.csv(df15, "model_pred.csv")

In [None]:
to_azure <- function(frame) { 
  bl <- storage_endpoint("https://blakestorageaccount581.blob.core.windows.net/model-csv-files?sp=r&st=2023-03-21T23:01:30Z&se=2023-03-22T07:01:30Z&spr=https&sv=2021-12-02&sr=c&sig=dkOCMgDHRxCN92KtGSXHA4sd%2Bysd2ISlcgd2Dd5Qdu8%3D", key="sp=r&st=2023-03-21T23:01:30Z&se=2023-03-22T07:01:30Z&spr=https&sv=2021-12-02&sr=c&sig=dkOCMgDHRxCN92KtGSXHA4sd%2Bysd2ISlcgd2Dd5Qdu8%3D")
  container <- storage_container(bl, "model-csv-files")
  write.csv(frame, "File.csv")
  storage_write_csv(frame, cont, "test.csv")
}


to_azure(Date_Frame)