In [1]:
using Pkg
#Pkg.add("Flux")

using CSV
using DataFrames
using Flux
using Statistics
using Random

#Se utilizan algunas funciones definidas en el ejercicio 1
include("./../src/exercise1_code.jl")

displayCorrelation (generic function with 1 method)

In [2]:
function OneHotEncoding(column)
    variables = unique(column)
    n = 0
    for var in variables
        replace!(column, var => "$(n)")
        n+=1
    end
    column = parse.(Int, column)
    return column
end     

OneHotEncoding (generic function with 1 method)

In [23]:
CM_data = CSV.read("./../dat/Churn_Modelling.csv", DataFrame)
rows, cols = dataShape(CM_data)

(10000, 14)

In [24]:
# Variables categoricas string codificadas
CM_data.Gender = OneHotEncoding(CM_data.Gender)
CM_data.Geography = OneHotEncoding(CM_data.Geography)

# Variables cuantitativas normalizadas
CM_data.Balance = Flux.normalise(CM_data.Balance)
CM_data.EstimatedSalary = Flux.normalise(CM_data.EstimatedSalary)

#Variables predictoras y variable objetivo
X = select(CM_data, Not(["RowNumber", "CustomerId", "Surname", "Exited"]))
y = Float32.(CM_data.Exited);

In [39]:
Random.seed!(18)

# rows es el número total de registros
# Muestra aleatoria de indices para los datos de entrenamiento (70% de los datos)
idx_train = sample(1:rows, Int(round(0.7*rows)), replace = false)
# Indices restantes para los datos de prueba
idx_test = Not(idx_train)

# Datos entrenamiento
X_train = Float32.(Matrix(X[idx_train,:])')
y_train = y[idx_train]

class0_idx = findall(y_train .== 0.0)
class1_idx = findall(y_train .== 1.0)
println("Clase 0: ", length(class0_idx))
println("Clase 1: ", length(class1_idx))

oversampled_idx = vcat(class0_idx, sample(class1_idx, length(class0_idx), replace=true))
X_train =X_train[:,oversampled_idx]
y_train = y_train[oversampled_idx]

# Datos prueba
X_test = Float32.(Matrix(X[idx_test,:])')
y_test = y[idx_test];
# #y_test = Flux.onehotbatch(y[idx_test], [0,1]);

Clase 0: 5567
Clase 1: 1433


In [40]:
# Definir modelo: una sola salida con sigmoide
#model = Chain(Dense(10 => 5, relu), Dense(5 => 5, relu), Dense(5 => 1))  # σ es la función sigmoide
model = Chain(Dense(10 => 1, σ))
function loss(model, X, y)
    ŷ = model(X)[1,:]  # Aplanar salida para calcular la pérdida
    Flux.logitbinarycrossentropy(ŷ, y)
end

# Métrica de precisión
accuracy(model, X, y) = mean((model(X)[:, 1] .> 0.5) .== y)

# Optimizador
optimizer = Flux.setup(Adam(0.1), model)

# Entrenamiento
data = [(X_train, y_train)]
for epoch in 1:10
    Flux.train!(loss, model, data, optimizer)
    println("Epoch $epoch - Accuracy: ", accuracy(model, X_train, y_train))
end

Epoch 1 - Accuracy: 0.5
Epoch 2 - Accuracy: 0.5
Epoch 3 - Accuracy: 0.5
Epoch 4 - Accuracy: 0.5
Epoch 5 - Accuracy: 0.5
Epoch 6 - Accuracy: 0.5
Epoch 7 - Accuracy: 0.5
Epoch 8 - Accuracy: 0.5
Epoch 9 - Accuracy: 0.5
Epoch 10 - Accuracy: 0.5


In [41]:
model(X_train)

1×11134 Matrix{Float32}:
 3.71157f-35  7.0375f-29  1.13027f-32  …  3.27187f-32  5.22469f-34

In [6]:
# # Modelo: una red con una capa densa, 10 entradas y 1 salida, cuya función de activación es la sigmoide
# model = Chain(Dense(10 => 2), softmax)

# #Loss function 
# function loss(model, X, y)
#     ŷ = model(X)
#     Flux.crossentropy(ŷ,y)
# end

# # Accuracy function
# accuracy(model, X, y) = mean(Flux.onecold(model(X),[0,1]) .== Flux.onecold(y,[0,1]))

# #Optimizador
# optimizer = Flux.setup(Adam(0.001), model)

# # Entrenamiento
# data = [(X_train, y_train)]
# for epoch in 1:50
#     Flux.train!(loss, model, data, optimizer)
#     println("Epoch $epoch - Accuracy: ", accuracy(model, X_train, y_train))
# end

In [44]:
[(X_train, y_train)][1]

(Float32[786.0 636.0 … 676.0 611.0; 1.0 2.0 … 0.0 0.0; … ; 0.0 1.0 … 0.0 1.0; 0.01459546 -0.80994624 … -0.085365735 -0.5886526], Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])

In [47]:
zip(X_train, y_train)

zip(Float32[786.0 636.0 … 676.0 611.0; 1.0 2.0 … 0.0 0.0; … ; 0.0 1.0 … 0.0 1.0; 0.01459546 -0.80994624 … -0.085365735 -0.5886526], Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])

In [42]:
?Flux.train!

```
train!(loss, model, data, opt_state)
```

Uses a `loss` function and training `data` to improve the `model`'s parameters according to a particular optimisation rule encoded in `opt_state`. Iterates through `data` once, evaluating for each `d in data` either `loss(model, d...)` if `d isa Tuple`, or else `loss(model, d)` for other `d`.

If `model` is an Enzyme.Duplicated and `Enzyme.jl` is loaded, gradients will be computed with Enzyme, otherwise they will be computed with Zygote.

For example, with these definitions...

```
data = [(x1, y1), (x2, y2), (x3, y3)]

loss3(m, x, y) = norm(m(x) .- y)        # the model is the first argument

opt_state = Flux.setup(Adam(), model)   # explicit setup of optimiser momenta
```

...calling `Flux.train!(loss3, model, data, opt_state)` runs a loop much like this:

```
for d in data
    ∂L∂m = gradient(loss3, model, d...)[1]
    update!(opt_state, model, ∂L∂m)
end
```

You can also write this loop yourself, if you need more flexibility. For this reason `train!` is not highly extensible. It adds only a few features to the loop above:

  * Stop with a `DomainError` if the loss is infinite or `NaN` at any point.
  * Show a progress bar using [`@withprogress`](https://github.com/JuliaLogging/ProgressLogging.jl).

!!! compat "New"
    This method was added in Flux 0.13.9. It has significant changes from the one used by Flux ≤ 0.13:

      * It now takes the `model` itself, not the result of `Flux.params`. (This is to move away from Zygote's "implicit" parameter handling, with `Grads`.)
      * Instead of `loss` being a function which accepts only the data, now it must also accept the `model` itself, as the first argument.
      * `opt_state` should be the result of [`Flux.setup`](@ref). Using an optimiser such as `Adam()` without this step should give you a warning.
      * Callback functions are not supported. (But any code can be included in the above `for` loop.)


---

```
train!(loss, Duplicated(model), data, opt_state)
```

This method uses Enzyme.jl instead of Zygote.jl to compute the gradients, but is otherwise the same as `train!(loss, model, data, opt_state)`.

Only available when Enzyme is loaded.

!!! compat "New"
    This method was added in Flux 0.13.9.



In [None]:
Random.seed!(19)

# rows es el número total de registros
# Muestra aleatoria de indices para los datos de entrenamiento (70% de los datos)
idx_train = sample(1:rows, Int(round(0.7*rows)), replace = false)
# Indices restantes para los datos de prueba
idx_test = Not(idx_train)
# Datos entrenamiento
X_train = Float32.(Matrix(X[idx_train,:])')
y_train = Flux.onehotbatch(y[idx_train], [0,1])
# Datos prueba
X_test = Float32.(Matrix(X[idx_test,:])')
y_test = Flux.onehotbatch(y[idx_test], [0,1]);

# Nota: se usa la transpuesta de la matriz para X_train, X_test para obtener una matriz de 10 filas y n columnas. Este formato es necesario.
# también se convierten los valores Float64 a Float32, pues así lo sugiere la documentación para tener un mejor rendimiento

# Modelo: una red con una capa densa, 10 entradas y 1 salida, cuya función de activación es la sigmoide
model = Chain(Dense(10 => 2), sigmoid)

# Loss function 
function loss(model, X, y)
    ŷ = model(X)
    Flux.binarycrossentropy(ŷ,y)
end

# Accuracy function
accuracy(model, X, y) = mean(Flux.onecold(model(X),[0,1]) .== Flux.onecold(y,[0,1]))


In [None]:
function get_processed_data()
    
    CM_data = CSV.read("./../dat/Churn_Modelling.csv", DataFrame)
    CM_data.Gender = OneHotEncoding(CM_data.Gender)
    CM_data.Geography = OneHotEncoding(CM_data.Geography)
    CM_data.Balance = Flux.normalise(CM_data.Balance)
    CM_data.EstimatedSalary = Flux.normalise(CM_data.EstimatedSalary)
    data = select(CM_data, Not(["RowNumber", "CustomerId", "Surname"]))
    rows, cols = dataShape(CM_data)

    X = select(data, Not(:Exited))
    y = data.Exited

    idx_train = sample(1:rows, Int(round(0.7*rows)), replace = false)
    idx_test = Not(idx_train)
    # Datos entrenamiento
    X_train = Float32.(Matrix(X[idx_train,:])')
    y_train = Flux.onehotbatch(y[idx_train], [0,1])
    # Datos prueba
    X_test = Float32.(Matrix(X[idx_test,:])')
    y_test = Flux.onehotbatch(y[idx_test], [0,1])

    ## Repeat the data `args.repeat` times
    train_data = (X_train, y_train)
    test_data = (X_test,y_test)

    return train_data, test_data
end

accuracy(model, x, y) = mean(Flux.onecold(model(x),[0,1]) .== Flux.onecold(y,[0,1]))

function train()	

    ## Load processed data
    train_data, test_data = get_processed_data()

    ## #Declare model 
    model = Chain(Dense(10, 2))
	
    ## Define loss function to be used in training
    ## For numerical stability, we use here logitcrossentropy
    loss(m, x, y) = logitcrossentropy(m(x), y)
	
    ## Training
    ## Gradient descent optimiser
    optimiser = Descent()

    println("Starting training.")
    Flux.train!(loss, model, train_data, optimiser)
	
    return model, test_data
end