In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from category_encoders import TargetEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np



In [2]:
df = pd.read_csv('../src/data/processed_files/df_datos_completos.csv')

In [3]:
# Columnas categóricas
categorical_columns = ['arbitro', 'estadio']

# Pipeline para codificar la columna 'arbitro' con OneHotEncoder
arbitro_pipeline = Pipeline([
    ('onehot', OneHotEncoder(sparse=False, handle_unknown='ignore'))
])

# Pipeline para codificar la columna 'estadio' con TargetEncoder
estadio_pipeline = Pipeline([
    ('target', TargetEncoder())
])

# ColumnTransformer para aplicar los pipelines a las columnas correspondientes
preprocessor = ColumnTransformer([
    ('arbitro', arbitro_pipeline, ['arbitro']),
    ('estadio', estadio_pipeline, ['estadio']),
    ], remainder = "passthrough")

# Pipeline final con el preprocesamiento y el modelo RandomForestClassifier
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('scaler', StandardScaler()),
])

In [4]:
X = df.drop(['index', 'fixture_id','resultado', 'goles_local', 'goles_visitante','goles_descanso_local','goles_descanso_visitante','fecha_timestamp' ], axis=1)
y = df['resultado']

In [5]:
pipeline.fit(X,y)
x_transform = pipeline.transform(X)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(x_transform, y, test_size=0.2)

In [25]:
model = keras.models.Sequential()
model.add(keras.layers.Dense(3857, input_dim=3857, activation='relu'))
model.add(keras.layers.Dense(1150, activation='relu'))
model.add(keras.layers.Dense(300, activation='relu'))
model.add(keras.layers.Dense(100, activation='relu'))
model.add(keras.layers.Dense(3, activation='softmax'))

In [26]:
model.compile(optimizer="sgd",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

In [27]:
print("Fit model on training data")
checkpoint_cb = keras.callbacks.ModelCheckpoint("callback_model.h5")
early_stopping_cb = keras.callbacks.EarlyStopping(patience=5, 
                                                  restore_best_weights=True)
history = model.fit(
    X_train,
    y_train,
    batch_size=32, 
    epochs=50, 
    validation_split = 0.2,
    callbacks = [checkpoint_cb, early_stopping_cb]
)

Fit model on training data
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50


In [28]:
results = model.evaluate(X_test, y_test)
print("test loss, test acc:", results)

test loss, test acc: [1.0820127725601196, 0.42525532841682434]
