# Neural network

Modelo de red neuronal que sirve para realizar la prediccion y analizar resultados.

- Lee los datos del df conjunto.
- Lee los datos que se usarán de test.
- Crea el modelo.
- Realiza la predicción.
- Analiza resultados.

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.decomposition import PCA
import os


df = pd.read_parquet('data/data.parquet')
test_data = pd.read_parquet("test/submission.parquet")

### Selección de características

In [None]:
#Variables globales
seed = 42
batch_size = 32
epochs = 10

features = ['station_id','hour','dayofweek','festa','month','Rain','Wind','ctx-1','ctx-2','ctx-3','ctx-4']
target = ['porcio']

### Dividir en train y test, normalizar datos y PCA

In [None]:
# train_test_split
X_train = df[features]
y_train = df[target]

# Normalizar los datos
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
#X_test_scaled = scaler.transform(X_test)

# Aplicar Análisis de Componentes Principales
pca = PCA(n_components=0.95)  # Mantener el 95% de la varianza explicada
X_train_pca = pca.fit_transform(X_train_scaled)

### Definicion de la red neuronal, compilacion y entrenamiento del modelo

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(256, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1)
])

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001,
    decay_steps=1000,
    decay_rate=0.9
)
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

model.compile(optimizer=optimizer, loss='mean_squared_error')

model.fit(X_train_scaled, Y_train, epochs=epochs, batch_size=batch_size)

### Predicción

In [None]:
new_test_data = test_data[features]
new_data_scaled = scaler.transform(new_test_data)
new_data_scaled = pca.transform(new_data_scaled)
predictions = model.predict(new_data_scaled)
ids = new_data.index.values

# Crear un DataFrame con las predicciones y los IDs
df = pd.DataFrame({'index': ids, 'percentage_docks_available': predictions[:, 0]})

# Definir la ruta y el nombre del archivo CSV
csv_file = 'PrediccionesKAGGEL.csv'

# Guardar el DataFrame en el archivo CSV
df.to_csv(csv_file, index=False)