In [2]:
########################################################################
# Linear-regression evaluation on 01_Belgium_1.xlsx
#   – Target : Y
#   – Regressors : T, X1, X2
#   – Metrics  : MAE (interpolation & extrapolation)
########################################################################

using XLSX, DataFrames, GLM, Random, Statistics

## 1. Load and tidy ----------------------------------------------------
xlsx = XLSX.readxlsx("01_Belgium_1.xlsx")   # adjust path if needed
df   = DataFrame(xlsx[1])                  # first sheet

# Keep only the required columns and give them simple names
rename!(df, Dict(
    :T                 => :T,
    Symbol("Y (GDP)")  => :Y,
    Symbol("X1 ( L )") => :X1,
    Symbol("X2 ( K )") => :X2
))
select!(df, [:T, :Y, :X1, :X2])
dropmissing!(df)                           # just in case

## 2. Interpolation error (5-fold CV) ---------------------------------
k     = 5
rng   = MersenneTwister(42)                # fixed seed = reproducibility
n     = nrow(df)
perm  = randperm(rng, n)                   # shuffled indices
folds = [perm[range] for range in Iterators.partition(1:n, ceil(Int, n/k))]

mae_interp = mean([
    begin
        test_idx  = folds[i]
        train_idx = vcat(folds[1:(i-1)]..., folds[(i+1):end]...)
        m   = lm(@formula(Y ~ T + X1 + X2), df[train_idx, :])
        ŷ   = predict(m, df[test_idx, :])
        mean(abs.(ŷ .- df.Y[test_idx]))
    end for i in 1:length(folds)
])

## 3. Extrapolation error (time-ordered split) -------------------------
cutoff = floor(Int, 0.80n)
train_idx, test_idx = 1:cutoff, cutoff+1:n

m_ex   = lm(@formula(Y ~ T + X1 + X2), df[train_idx, :])
ŷ_ex   = predict(m_ex, df[test_idx, :])
mae_ex = mean(abs.(ŷ_ex .- df.Y[test_idx]))

## 4. Show results -----------------------------------------------------
println("Interpolation MAE = ", round(mae_interp, digits = 2))
println("Extrapolation MAE  = ", round(mae_ex,    digits = 2))


LoadError: ArgumentError: no default `Tables.columns` implementation for type: XLSX.Worksheet

In [3]:
########################################################################
# Linear‐regression evaluation on 01_Belgium_1.xlsx
#   – Target     : Y
#   – Regressors : T, X1, X2
#   – Metrics    : MAE (interpolation & extrapolation)
########################################################################
using XLSX, DataFrames, GLM, Random, Statistics

############ 1. Load & tidy ###########################################
# read first sheet → DataFrame (robust to XLSX.jl versions)
xf    = XLSX.readxlsx("01_Belgium_1.xlsx")
sheet = xf[1]                                            # or xf["Sheet1"]
df    = DataFrame(XLSX.readtable(sheet; infer_eltypes=true)...)

# rename if your column headers contain spaces / parentheses
rename!(df, Dict(
    :T                 => :T,
    Symbol("Y (GDP)")  => :Y,    # adjust to your real header names
    Symbol("X1 ( L )") => :X1,
    Symbol("X2 ( K )") => :X2
)) do name            # fallback: keep original if rename key missing
    name
end
select!(df, [:T, :Y, :X1, :X2])
dropmissing!(df)

############ 2. Interpolation error (5-fold CV) #######################
k     = 5
rng   = MersenneTwister(42)
n     = nrow(df)
perm  = randperm(rng, n)
folds = [perm[range] for range in Iterators.partition(1:n, ceil(Int, n/k))]

mae_interp = mean([
    begin
        test_idx  = folds[i]
        train_idx = vcat(folds[1:(i-1)]..., folds[(i+1):end]...)
        m   = lm(@formula(Y ~ T + X1 + X2), df[train_idx, :])
        ŷ   = predict(m, df[test_idx, :])
        mean(abs.(ŷ .- df.Y[test_idx]))
    end for i in 1:length(folds)
])

############ 3. Extrapolation error (time split) ######################
cutoff = floor(Int, 0.80n)
train_idx, test_idx = 1:cutoff, cutoff+1:n

m_ex   = lm(@formula(Y ~ T + X1 + X2), df[train_idx, :])
ŷ_ex   = predict(m_ex, df[test_idx, :])
mae_ex = mean(abs.(ŷ_ex .- df.Y[test_idx]))

############ 4. Report ################################################
println("Interpolation MAE = ", round(mae_interp, digits = 2))
println("Extrapolation MAE  = ", round(mae_ex,    digits = 2))


LoadError: MethodError: no method matching readtable(::XLSX.Worksheet; infer_eltypes=true)
[0mClosest candidates are:
[0m  readtable([91m::Union{AbstractString, IO}[39m, [91m::Union{Int64, AbstractString}[39m; first_row, column_labels, header, infer_eltypes, stop_in_empty_row, stop_in_row_function, enable_cache, keep_empty_rows) at ~/.julia/packages/XLSX/U2Bcm/src/read.jl:603
[0m  readtable([91m::Union{AbstractString, IO}[39m, [91m::Union{Int64, AbstractString}[39m, [91m::Union{XLSX.ColumnRange, AbstractString}[39m; first_row, column_labels, header, infer_eltypes, stop_in_empty_row, stop_in_row_function, enable_cache, keep_empty_rows) at ~/.julia/packages/XLSX/U2Bcm/src/read.jl:610

In [4]:
########################################################################
# Linear regression on 01_Belgium_1.xlsx
#   • Target     : Y
#   • Regressors : T, X1, X2
#   • Metrics    : MAE (interpolation & extrapolation)
########################################################################
using XLSX, DataFrames, GLM, Random, Statistics

############ 1. Load & tidy ###########################################
tbl = XLSX.readtable("01_Belgium_1.xlsx", 1; infer_eltypes = true)
df  = DataFrame(tbl)

# If the real column headers differ, adjust here  ↓↓↓
rename!(df, Dict(
    :T                 => :T,        # leave as-is if already :T
    Symbol("Y")        => :Y,        # e.g. maybe just "Y"
    Symbol("X1")       => :X1,
    Symbol("X2")       => :X2
)) do n; n end                       # silent pass‐through for missing keys
select!(df, [:T, :Y, :X1, :X2])
dropmissing!(df)

############ 2. Interpolation MAE (5-fold CV) #########################
k, rng = 5, MersenneTwister(42)
n      = nrow(df)
perm   = randperm(rng, n)
folds  = [perm[range] for range in Iterators.partition(1:n, ceil(Int, n/k))]

mae_interp = mean([
    begin
        test, train = folds[i], vcat(folds[1:(i-1)]..., folds[(i+1):end]...)
        m   = lm(@formula(Y ~ T + X1 + X2), df[train, :])
        ŷ   = predict(m, df[test,  :])
        mean(abs.(ŷ .- df.Y[test]))
    end for i in 1:length(folds)
])

############ 3. Extrapolation MAE (time split) ########################
cut  = floor(Int, 0.80n)
m_ex = lm(@formula(Y ~ T + X1 + X2), df[1:cut, :])
ŷ_ex = predict(m_ex, df[cut+1:end, :])
mae_ex = mean(abs.(ŷ_ex .- df.Y[cut+1:end]))

############ 4. Report ###############################################
println("Interpolation MAE = ", round(mae_interp, digits = 2))
println("Extrapolation MAE  = ", round(mae_ex,    digits = 2))


LoadError: MethodError: no method matching rename!(::var"#1#2", ::DataFrame, ::Dict{Symbol, Symbol})
[0mClosest candidates are:
[0m  rename!(::Function, ::AbstractDataFrame) at ~/.julia/packages/DataFrames/58MUJ/src/abstractdataframe/abstractdataframe.jl:255
[0m  rename!(::Function, [91m::DataFrames.Index[39m) at ~/.julia/packages/DataFrames/58MUJ/src/other/index.jl:111
[0m  rename!(::Function, [91m::DataFrames.SubIndex[39m) at ~/.julia/packages/DataFrames/58MUJ/src/other/index.jl:589

In [5]:
using XLSX, DataFrames, GLM, StatsBase, Random, Statistics

# 1. Load the data
xlsx = XLSX.readxlsx("01_Belgium_1.xlsx")
sheet = xlsx[1]  # adjust if sheet index/name differs
df = DataFrame(sheet)

# 2. Select and rename columns
select!(df, [:T, Symbol("X1 ( L )")=>:X1, Symbol("X2 ( K )")=>:X2, Symbol("Y (GDP)")=>:Y])
dropmissing!(df)

# 3. Interpolation MAE: random 80/20 split
Random.seed!(0)  # for reproducibility
n = nrow(df)
idx = shuffle(1:n)
n_train = Int(floor(0.8 * n))
train_idx = idx[1:n_train]
test_idx  = idx[n_train+1:end]

train_int = df[train_idx, :]
test_int  = df[test_idx, :]

model_int = lm(@formula(Y ~ T + X1 + X2), train_int)
y_pred_int = predict(model_int, test_int)
mae_int = mean(abs.(test_int.Y .- y_pred_int))

# 4. Extrapolation MAE: split by T threshold at 80th percentile
th = quantile(df.T, 0.80)
train_ext = df[df.T .<= th, :]
test_ext  = df[df.T .>  th, :]

model_ext = lm(@formula(Y ~ T + X1 + X2), train_ext)
y_pred_ext = predict(model_ext, test_ext)
mae_ext = mean(abs.(test_ext.Y .- y_pred_ext))

# 5. Print results
println("Interpolation MAE: ", round(mae_int, digits=2))
println("Extrapolation MAE: ", round(mae_ext, digits=2))


LoadError: ArgumentError: Package StatsBase not found in current path.
- Run `import Pkg; Pkg.add("StatsBase")` to install the StatsBase package.

In [6]:
using Pkg
# If you haven’t already installed any of these:
# Pkg.add.(["XLSX", "DataFrames", "GLM"])

using XLSX, DataFrames, GLM, Random, Statistics

# 1. Load the data
xlsx = XLSX.readxlsx("01_Belgium_1.xlsx")
sheet = xlsx[1]                # adjust if sheet index/name differs
df = DataFrame(sheet)

# 2. Select and rename columns
select!(df, 
    :T,
    Symbol("X1 ( L )") => :X1,
    Symbol("X2 ( K )") => :X2,
    Symbol("Y (GDP)") => :Y
)
dropmissing!(df)

# 3. Interpolation MAE: random 80/20 split
Random.seed!(0)               # for reproducibility
n = nrow(df)
perm = randperm(n)
n_train = floor(Int, 0.8 * n)
train_int = df[perm[1:n_train], :]
test_int  = df[perm[n_train+1:end], :]

model_int = lm(@formula(Y ~ T + X1 + X2), train_int)
y_pred_int = predict(model_int, test_int)
mae_int = mean(abs.(test_int.Y .- y_pred_int))

# 4. Extrapolation MAE: split by T threshold at 80th percentile
th = quantile(df.T, 0.80)     # from Statistics
train_ext = df[df.T .<= th, :]
test_ext  = df[df.T .>  th, :]

model_ext = lm(@formula(Y ~ T + X1 + X2), train_ext)
y_pred_ext = predict(model_ext, test_ext)
mae_ext = mean(abs.(test_ext.Y .- y_pred_ext))

# 5. Print results
println("Interpolation MAE: ", round(mae_int, digits=2))
println("Extrapolation MAE: ", round(mae_ext, digits=2))


LoadError: ArgumentError: no default `Tables.columns` implementation for type: XLSX.Worksheet