# Ejercicio 2: Overfitting en Julia
Este notebook implementa el cálculo de R², R² ajustado y R² fuera de muestra usando modelos polinomiales en Julia.

In [None]:
using Random, LinearAlgebra, Statistics
using Plots
using StatsBase

Random.seed!(123)
n = 1000
X = rand(n)
X = sort(X)
ϵ = randn(n)
y = exp.(4 .* X) .+ ϵ

function r2(y, ŷ)
    ss_res = sum((y .- ŷ).^2)
    ss_tot = sum((y .- mean(y)).^2)
    return 1 - ss_res/ss_tot
end

function r2_adjusted(r2::Float64, n::Int, k::Int)
    if k >= n - 1
        return NaN
    else
        return 1 - (1 - r2) * (n - 1) / (n - k - 1)
    end
end

features_list = [1, 2, 5, 10, 20, 50, 100, 200, 500]

r2_full = Float64[]
r2_adj = Float64[]
r2_out = Float64[]

train_idx = sample(1:n, Int(round(0.75*n)), replace=false)
test_idx = setdiff(1:n, train_idx)

for p in features_list
    X_poly = hcat([X.^i for i in 1:p]...)
    β = X_poly \ y
    ŷ = X_poly * β
    
    r2_val = r2(y, ŷ)
    push!(r2_full, r2_val)
    push!(r2_adj, r2_adjusted(r2_val, n, p))

    X_train, y_train = X_poly[train_idx, :], y[train_idx]
    X_test, y_test = X_poly[test_idx, :], y[test_idx]

    β_train = X_train \ y_train
    ŷ_test = X_test * β_train
    push!(r2_out, r2(y_test, ŷ_test))
end

In [None]:
plot(features_list, r2_full, marker=:o, xscale=:log10,
    xlabel="Número de features (escala log)",
    ylabel="R²",
    title="R² en toda la muestra",
    legend=false)

In [None]:
plot(features_list, r2_adj, marker=:o, xscale=:log10,
    xlabel="Número de features (escala log)",
    ylabel="R² ajustado",
    title="R² ajustado",
    legend=false)

In [None]:
plot(features_list, r2_out, marker=:o, xscale=:log10,
    xlabel="Número de features (escala log)",
    ylabel="R² fuera de muestra",
    title="R² en test",
    legend=false)