In [32]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from category_encoders import TargetEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping


In [8]:
df = pd.read_csv('../data/processed_files/df_datos_completos.csv')
df.head()

Unnamed: 0,index,id_equipo_local,id_equipo_visitante,goles_local,goles_visitante,resultado,arbitro,fixture_id,fecha_timestamp,goles_descanso_local,...,titu-337523.0,titu-338295.0,titu-341700.0,titu-347886.0,titu-380261.0,odd_1,odd_x,odd_2,tiros_para_marcar_local,tiros_para_marcar_away
0,2481,530,539,3,1,1,"Alfonso Alvarez Izquierdo, Spain",203877,1420297200,1,...,0,0,0,0,0,1.2,6.5,17.0,9.25,11.333333
1,2482,536,538,1,0,1,"Carlos Velasco Carballo, Spain",203878,1420304400,1,...,0,0,0,0,0,1.67,3.8,5.5,9.0,7.5
2,2483,797,533,2,2,0,"Carlos Clos Gomez, Spain",203879,1420311600,2,...,0,0,0,0,0,5.0,3.6,1.75,25.0,12.0
3,2484,544,531,1,0,1,"Alberto Undiano, Spain",203880,1420318800,1,...,0,0,0,0,0,3.2,3.1,2.4,44.0,11.333333
4,2485,535,723,1,2,2,"Fernando Teixeira Vitienes, Spain",203881,1420318800,0,...,0,0,0,0,0,1.53,4.1,6.5,6.0,11.0


In [9]:
X = df.drop(['index', 'fixture_id','resultado', 'goles_local', 'goles_visitante','goles_descanso_local','goles_descanso_visitante','fecha_timestamp'], axis=1)
y = df['resultado']

In [66]:
# Pipeline para codificar la columna 'arbitro' con OneHotEncoder
arbitro_pipeline = Pipeline([
    ('onehot', OneHotEncoder(sparse=False, handle_unknown='ignore'))
])

# Pipeline para codificar la columna 'estadio' con TargetEncoder
estadio_pipeline = Pipeline([
    ('target', TargetEncoder())
])

# ColumnTransformer para aplicar los pipelines a las columnas correspondientes
preprocessor = ColumnTransformer([
    ('arbitro', arbitro_pipeline, ['arbitro']),
    ('estadio', estadio_pipeline, ['estadio']),
    ], remainder = "passthrough")

# Pipeline final con el preprocesamiento y el modelo RandomForestClassifier
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('standard', StandardScaler())
])

In [67]:
pipeline.fit(X,y)

In [68]:
X_transformed = pipeline.transform(X)

In [69]:
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.2)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)



In [201]:
model = keras.Sequential([
    keras.layers.Dense(5, activation='relu', input_shape=(X_transformed.shape[1],)),
    keras.layers.Dense(20, activation='relu'),
    keras.layers.Dense(30, activation='relu'),
    keras.layers.Dense(10, activation='relu'),
    keras.layers.Dense(3, activation='softmax')
])



In [207]:
model.compile(optimizer='sgd',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [208]:
model.summary()

Model: "sequential_25"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_93 (Dense)            (None, 5)                 19305     
                                                                 
 dense_94 (Dense)            (None, 20)                120       
                                                                 
 dense_95 (Dense)            (None, 30)                630       
                                                                 
 dense_96 (Dense)            (None, 10)                310       
                                                                 
 dense_97 (Dense)            (None, 3)                 33        
                                                                 
Total params: 20,398
Trainable params: 20,398
Non-trainable params: 0
_________________________________________________________________


In [209]:
earlystop = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='auto')


In [210]:
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_val, y_val), callbacks=[earlystop])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 11: early stopping


In [211]:
test_loss, test_acc = model.evaluate(X_test,  y_test, verbose=2)

print('\nTest accuracy:', test_acc)

34/34 - 0s - loss: 4.3107 - accuracy: 0.4757 - 174ms/epoch - 5ms/step

Test accuracy: 0.4756554365158081


In [None]:
0.48033708333969116

In [None]:
model = keras.Sequential([
    keras.layers.Dense(5, activation='relu', input_shape=(X_transformed.shape[1],)),
    keras.layers.Dense(20, activation='relu'),
    keras.layers.Dense(30, activation='relu'),
    
    keras.layers.Dense(3, activation='softmax')
])

In [241]:
df = pd.read_csv('../data/processed_files/df_datos_completos.csv')

In [242]:
X = df.drop(['index', 'fixture_id','resultado', 'goles_local', 'goles_visitante','goles_descanso_local','goles_descanso_visitante','fecha_timestamp'], axis=1)
y = df['resultado']

In [244]:
encoder = OneHotEncoder()
encoded_col = encoder.fit_transform(X['estadio'].values.reshape(-1, 1))
encoded_col_df = pd.DataFrame(encoded_col.toarray(), columns=encoder.get_feature_names([X['estadio'].name]))
data = pd.concat([X, encoded_col_df], axis=1)




In [245]:
encoded_col = encoder.fit_transform(X['arbitro'].values.reshape(-1, 1))
encoded_col_df = pd.DataFrame(encoded_col.toarray(), columns=encoder.get_feature_names([X['arbitro'].name]))
data = pd.concat([X, encoded_col_df], axis=1)



In [247]:
data.drop(['arbitro','estadio'],inplace=True, axis=1)

In [251]:
df_les = data.loc[:, data.columns.str.startswith('les-')]
df_titu = data.loc[:, data.columns.str.startswith('titu-')]
data.drop(df_les.columns, inplace=True, axis=1)
data.drop(df_titu.columns, inplace=True, axis=1)


In [252]:
data.head()

Unnamed: 0,id_equipo_local,id_equipo_visitante,season,shots_on_goal_local,shots_on_goal_away,shots_off_goal_local,shots_off_goal_away,total_shots_local,total_shots_away,blocked_shots_local,...,arbitro_Santiago Varón,"arbitro_Saul Ais Reig, Spain",arbitro_Saúl Ais,"arbitro_Valentin Pizarro Gomez, Spain",arbitro_Valentín Pizarro,"arbitro_Victor Areces Franco, Spain","arbitro_Victor Garcia Verdura, Spain",arbitro_Víctor Areces,arbitro_Víctor García,arbitro_Álvaro Moreno
0,530,539,2014,5.0,3.0,5.0,4.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,536,538,2014,6.0,1.0,3.0,7.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,797,533,2014,7.0,3.0,5.0,6.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,544,531,2014,7.0,5.0,4.0,8.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,535,723,2014,8.0,5.0,2.0,2.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
