# Neural network

Modelo de red neuronal que sirve para realizar la prediccion y analizar resultados.

- Lee los datos del df conjunto.
- Lee los datos que se usarán de test.
- Crea el modelo.
- Realiza la predicción.
- Analiza resultados.

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

df = pd.read_parquet('data/data.parquet')
test_data = pd.read_parquet("test/submission.parquet")

### Variables globales

In [2]:
#Variables globales
seed = 42
batch_size = 32
epochs = 10

features = ['station_id','hour','dayofweek','festa','month','Rain','Wind','ctx-1','ctx-2','ctx-3','ctx-4']
target = ['porcio']

### Dividir en train y test, normalizar datos y PCA

In [4]:
# train_test_split
X = df[features]
y = df[target]
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=seed)

X_test = test_data[features]
#y_test = test_data[target]

# Normalizar los datos
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Aplicar Análisis de Componentes Principales, manteniendo explicada el 95% de la varianza
pca = PCA(n_components=0.95)  
X_train_pca = pca.fit_transform(X_train_scaled)
X_val_pca = pca.transform(X_val_scaled)
X_test_pca = pca.transform(X_test_scaled)

### Definicion de la red neuronal, compilacion y entrenamiento del modelo

In [5]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(256, activation='relu', input_shape=(X_train_pca.shape[1],)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1)
])

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001,
    decay_steps=1000,
    decay_rate=0.9
)
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

model.compile(optimizer=optimizer, loss='mean_squared_error')

model.fit(X_train_pca, y_train,batch_size=batch_size,epochs=epochs,verbose=1,validation_data=(X_val_pca, y_val))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x23c5af64a00>

### Predicción

In [6]:
predictions = model.predict(X_test_pca)
ids = test_data[features].index.values

# Crear un DataFrame con las predicciones y los IDs
df = pd.DataFrame({'index': ids, 'percentage_docks_available': predictions[:, 0]})

# Definir la ruta y el nombre del archivo CSV
csv_file = 'PrediccionesKAGGEL.csv'

# Guardar el DataFrame en el archivo CSV
df.to_csv(csv_file, index=False)



In [7]:
df

Unnamed: 0,index,percentage_docks_available
0,0,0.820360
1,1,0.627730
2,2,0.563880
3,3,0.766312
4,4,0.793010
...,...,...
54994,54994,0.490554
54995,54995,0.156667
54996,54996,0.771802
54997,54997,0.603834


### Análisis descriptivo

In [8]:
describe_results = df.describe()
describe_results

Unnamed: 0,index,percentage_docks_available
count,54999.0,54999.0
mean,27499.0,0.576865
std,15876.988064,0.230902
min,0.0,0.028993
25%,13749.5,0.391221
50%,27499.0,0.607387
75%,41248.5,0.780582
max,54998.0,0.960856
