In [1]:
using Random
using DataFrames
using Distributions
using Plots

seed = 1234

1234

In [2]:
Random.seed!(seed)

gpd = 8
n_days = 400
n_teams = 32

true_intercept = zeros(n_days)
true_home_adv = zeros(n_days)
true_offence = zeros(n_days, n_teams)
true_defence = zeros(n_days, n_teams)

# Initialize the true values
true_intercept[1] = 1.12
true_home_adv[1] = 0.25
o_init = randn(n_teams)*0.15
o_init = o_init .- mean(o_init)
true_offence[1, :] = o_init
d_init = randn(n_teams)*0.10
d_init = d_init .- mean(d_init)
true_defence[1, :] = randn(n_teams)*0.10
Δ_σ_ih = 0.0005
Δ_σ_od = 0.004
;

In [3]:
Random.seed!(seed)

for t = 2:n_days
    true_intercept[t] = true_intercept[t-1] + (Δ_σ_ih * randn())
    true_home_adv[t] = true_home_adv[t-1] + (Δ_σ_ih * randn())
    new_true_offence = true_offence[t-1, :] .+ (Δ_σ_od * randn(n_teams))
    true_offence[t, :] = new_true_offence .- mean(new_true_offence)
    new_true_defence = true_defence[t-1, :] .+ (Δ_σ_od * randn(n_teams))
    true_defence[t, :] = new_true_defence .- mean(new_true_defence)
end

In [4]:
Random.seed!(seed)

game_permutations = [randperm(n_teams) for i = 1:n_days]
games_full = DataFrame(
    day = Int64[], 
    home_id = Int64[], 
    home_score = Int64[], 
    away_id = Int64[], 
    away_score = Int64[],
    home_win = Bool[]
)

for (t, g) in enumerate(game_permutations)
    for _ = 1:gpd
        for i = 1:2:n_teams
            home_id = g[i]
            away_id = g[i+1]
            home_λ = exp(true_intercept[t] + true_home_adv[t] + true_offence[t, home_id] - true_defence[t, away_id])
            home_score = rand(Poisson(home_λ))
            away_λ = exp(true_intercept[t] + true_offence[t, away_id] - true_defence[t, home_id])
            away_score = rand(Poisson(away_λ))
            home_p = home_λ/(home_λ + away_λ)
            if home_score > away_score games_full
                home_win = true
            elseif home_score == away_score
                home_win = rand(Bernoulli(home_p))
            else
                home_win = false
            end
            push!(games_full, [t, home_id, home_score, away_id, away_score, home_win])
        end
    end
end

In [5]:
latent_variables = DataFrame(
    day = 1:n_days,
    h = true_home_adv,
    i = true_intercept
)

for i = 1:n_teams
    latent_variables["o$i"] = true_offence[:,i]
    latent_variables["d$i"] = true_defence[:,i]
end

│     df[!, col_ind] = v
│     df
│ end` instead.
│   caller = top-level scope at In[5]:8
└ @ Core ./In[5]:8
│     df[!, col_ind] = v
│     df
│ end` instead.
│   caller = top-level scope at In[5]:9
└ @ Core ./In[5]:9


In [6]:
first(latent_variables, 5)

Unnamed: 0_level_0,day,h,i,o1,d1,o2,d2,o3
Unnamed: 0_level_1,Int64,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,1,0.25,1.12,0.0889899,0.0128064,-0.176374,0.185278,-0.115284
2,2,0.249549,1.12043,0.0857186,-0.00913713,-0.181279,0.159257,-0.11312
3,3,0.249604,1.12002,0.0854775,-0.00313757,-0.179036,0.156408,-0.112068
4,4,0.249303,1.11986,0.0799276,0.00251235,-0.175478,0.156417,-0.111289
5,5,0.249145,1.11957,0.0766632,-0.000407825,-0.173755,0.156237,-0.108445


In [7]:
using CSV
CSV.write("ideal_games.csv", games_full)

"ideal_games.csv"

In [8]:
CSV.write("latent_variables.csv", latent_variables)

"latent_variables.csv"

In [9]:
function latent_var_plot(var_name)
    ldays = latent_variables[:,:day]
    ly = latent_variables[:,var_name]
    plot(ldays, ly)
end

latent_var_plot (generic function with 1 method)

In [10]:
latent_var_plot(:h)

InterruptException: InterruptException: