# TFM

## Importación de librerías

In [85]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Lectura de fichero

In [86]:
df = pd.read_csv('../../data/datos_simulacion.csv').drop(columns=[
    'Unnamed: 0',
    'Tiempo (dia)'
])

display(df)

Unnamed: 0,Volumen dep. almacenam. ini. (L),Venta (L),Llenado dep. almacenam. (L),Volumen dep. almacenam. fin. teor. (L),Volumen dep. almacenam. fin. (L),Variacion,Variacion Acum.,Fugando combustible
0,2030059,-5785,701789,2153348,2152272,-1076,2346,0
1,2152272,-6040,798327,2346599,2330224,-16375,3597,0
2,2330224,-6005,455882,2185606,218230,-3306,-3851,0
3,218230,-5820,528444,2128744,212570,-3044,-4557,0
4,212570,-5975,623865,2152065,2163932,11867,-462,0
5,2163932,-5895,757884,2332316,2342013,9697,10545,0
6,2342013,-5820,439104,2199117,2204816,5699,15848,0
7,2204816,-6135,77102,2362336,2356883,-5453,4856,0
8,2356883,-5700,370858,2157741,2167139,9398,14114,0
9,2167139,-6240,694583,2237722,2241702,398,11387,0


# Limpieza de datos

In [87]:
bad_formatted_fields = [
    'Volumen dep. almacenam. ini. (L)',
    'Llenado dep. almacenam. (L)',
    'Volumen dep. almacenam. fin. teor. (L)',
    'Volumen dep. almacenam. fin. (L)',
    'Variacion',
    'Variacion Acum.',
]

for column in bad_formatted_fields:
    df[column] = df[column].str.replace(',', '.')
    df[column] = pd.to_numeric(df[column])


display(df)

Unnamed: 0,Volumen dep. almacenam. ini. (L),Venta (L),Llenado dep. almacenam. (L),Volumen dep. almacenam. fin. teor. (L),Volumen dep. almacenam. fin. (L),Variacion,Variacion Acum.,Fugando combustible
0,20300.59,-5785,7017.89,21533.48,21522.72,-10.76,234.6,0
1,21522.72,-6040,7983.27,23465.99,23302.24,-163.75,35.97,0
2,23302.24,-6005,4558.82,21856.06,21823.0,-33.06,-38.51,0
3,21823.0,-5820,5284.44,21287.44,21257.0,-30.44,-45.57,0
4,21257.0,-5975,6238.65,21520.65,21639.32,118.67,-4.62,0
5,21639.32,-5895,7578.84,23323.16,23420.13,96.97,105.45,0
6,23420.13,-5820,4391.04,21991.17,22048.16,56.99,158.48,0
7,22048.16,-6135,7710.2,23623.36,23568.83,-54.53,48.56,0
8,23568.83,-5700,3708.58,21577.41,21671.39,93.98,141.14,0
9,21671.39,-6240,6945.83,22377.22,22417.02,39.8,113.87,0


## Descripción de campos

<!-- TODO -->

## Preprocesados de datos

In [88]:
from sklearn.preprocessing import MinMaxScaler

numeric_cols = [
    'Volumen dep. almacenam. ini. (L)',
    'Llenado dep. almacenam. (L)',
    'Volumen dep. almacenam. fin. teor. (L)',
    'Volumen dep. almacenam. fin. (L)',
    'Variacion',
    'Variacion Acum.',
]

for col in numeric_cols:
    df[col] = MinMaxScaler().fit_transform(df[[col]])

display(df)

Unnamed: 0,Volumen dep. almacenam. ini. (L),Venta (L),Llenado dep. almacenam. (L),Volumen dep. almacenam. fin. teor. (L),Volumen dep. almacenam. fin. (L),Variacion,Variacion Acum.,Fugando combustible
0,0.016528,-5785,0.666044,0.289043,0.291947,0.541711,0.746444,0
1,0.291947,-6040,0.835813,0.723083,0.69298,0.0,0.448751,0
2,0.69298,-6005,0.233598,0.361494,0.359619,0.462751,0.337125,0
3,0.359619,-5820,0.361204,0.233783,0.232065,0.472027,0.326544,0
4,0.232065,-5975,0.529009,0.286162,0.318224,1.0,0.387917,0
5,0.318224,-5895,0.764691,0.691003,0.719548,0.923164,0.552883,0
6,0.719548,-5820,0.204093,0.39184,0.410361,0.781602,0.632361,0
7,0.410361,-6135,0.787791,0.758428,0.753059,0.386729,0.46762,0
8,0.753059,-5700,0.084077,0.29891,0.325452,0.912577,0.606373,0
9,0.325452,-6240,0.653371,0.478546,0.493487,0.720735,0.565502,0


## Implementación de modelos

In [89]:
import tensorflow as tf
from sklearn.model_selection import train_test_split

OBJECTIVE_VARIABLE = 'Fugando combustible'

X_train, X_test, y_train, y_test = train_test_split(
    df.drop(columns=OBJECTIVE_VARIABLE),
    df[OBJECTIVE_VARIABLE],
    stratify=df[OBJECTIVE_VARIABLE]
)

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1], 1)),
    tf.keras.layers.LSTM(15),
    tf.keras.layers.Dense(1, activation='sigmoid')
])


model.summary()

In [94]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [95]:
model.fit(
    x = X_train,
    y = y_train,
    epochs = 250,
    batch_size = 32,
    validation_data = (X_test, y_test),
    verbose = 1
)

Epoch 1/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 1.0000 - loss: 0.0041 - val_accuracy: 0.7143 - val_loss: 1.5681
Epoch 2/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 1.0000 - loss: 0.0061 - val_accuracy: 0.7143 - val_loss: 1.5955
Epoch 3/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - accuracy: 1.0000 - loss: 0.0046 - val_accuracy: 0.7143 - val_loss: 1.5389
Epoch 4/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 1.0000 - loss: 0.0067 - val_accuracy: 0.7143 - val_loss: 1.4060
Epoch 5/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 1.0000 - loss: 0.0044 - val_accuracy: 0.7143 - val_loss: 1.3597
Epoch 6/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 1.0000 - loss: 0.0043 - val_accuracy: 0.7143 - val_loss: 1.4009
Epoch 7/250
[1m1/1[0m [32m━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x15a6b153450>

In [97]:
y_pred = (model.predict(X_test) > 0.5).astype(int)
y_pred

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step


array([[0],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1]])