# Neural network

Modelo de red neuronal que sirve para realizar la prediccion y analizar resultados.

- Lee los datos del df conjunto.
- Lee los datos que se usarán de test.
- Crea el modelo.
- Realiza la predicción.
- Analiza resultados.

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

df = pd.read_parquet('data/data.parquet')
test_data = pd.read_parquet("test/submission.parquet")

In [2]:
df.head(5)

Unnamed: 0,index,station_id,houryear,num_bikes_available,num_bikes_available_types.mechanical,num_bikes_available_types.ebike,num_docks_available,is_charging_station,is_installed,is_renting,...,Rain_Lectura,year,dayyear,Wind,Wind_Lectura,traffic,ctx-1,ctx-2,ctx-3,ctx-4
8160519,695,1,7998,26.333333,25.416667,0.916667,14.666667,1.0,1.0,1.0,...,0.0,2021,334,0,6.2,-4.8,0.197154,0.170732,0.170732,0.170732
8160520,696,1,7999,16.583333,16.25,0.333333,24.416667,1.0,1.0,1.0,...,0.0,2021,334,0,6.2,-5.5,0.357724,0.197154,0.170732,0.170732
8160521,697,1,8000,8.75,8.5,0.25,32.166667,1.0,1.0,1.0,...,0.0,2021,334,0,6.2,-4.808333,0.595528,0.357724,0.197154,0.170732
8160522,698,1,8001,2.25,2.25,0.0,38.5,1.0,1.0,1.0,...,0.0,2021,334,0,6.2,-4.808333,0.786151,0.595528,0.357724,0.197154
8160523,699,1,8002,1.5,1.416667,0.083333,39.5,1.0,1.0,1.0,...,0.0,2021,334,0,6.2,-3.666667,0.944785,0.786151,0.595528,0.357724


### Selección de características

In [3]:
#Variables globales
seed = 42
batch_size = 32
epochs = 10

features = ['station_id','hour','dayofweek','festa','month','Rain','Wind','ctx-1','ctx-2','ctx-3','ctx-4']
target = ['porcio']

### Dividir en train y test, normalizar datos y PCA

In [4]:
# train_test_split
X = df[features]
y = df[target]
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=seed)

X_test = test_data[features]
#y_test = test_data[target]

# Normalizar los datos
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Aplicar Análisis de Componentes Principales, manteniendo explicada el 95% de la varianza
pca = PCA(n_components=0.95)  
X_train_pca = pca.fit_transform(X_train_scaled)
X_val_pca = pca.transform(X_val_scaled)
X_test_pca = pca.transform(X_test_scaled)

### Definicion de la red neuronal, compilacion y entrenamiento del modelo

In [6]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(256, activation='relu', input_shape=(X_train_pca.shape[1],)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1)
])

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001,
    decay_steps=1000,
    decay_rate=0.9
)
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

model.compile(optimizer=optimizer, loss='mean_squared_error')

model.fit(X_train_pca, y_train,batch_size=batch_size,epochs=epochs,verbose=1,validation_data=(X_val_pca, y_val))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x15f2c569540>

### Predicción

In [7]:
# Prediccion
predictions = model.predict(X_test_pca)



### Comparar las predicciones con los valores reales

In [8]:
df_predictions = pd.DataFrame({'Predicciones': predictions.flatten()})
df_predictions

Unnamed: 0,Predicciones
0,0.909079
1,0.760426
2,0.657844
3,0.830717
4,0.822796
...,...
54994,0.688308
54995,0.361947
54996,0.849445
54997,0.703261


### Análisis descriptivo

In [9]:
describe_results = df_predictions.describe()
describe_results

Unnamed: 0,Predicciones
count,54999.0
mean,0.678437
std,0.227321
min,-0.176801
25%,0.527123
50%,0.727967
75%,0.858798
max,1.159923
