# Visualización de datos inicial

In [32]:
import pandas as pd

consumo_granada_original = pd.read_csv('../data/raw/consumo_granada.csv')
# Crear una copia para trabajar
consumo_granada = consumo_granada_original.copy()

## Visualización estructura de datos

In [33]:
consumo_granada.head()

Unnamed: 0,timestamp,zone_id,zone_name,consumption_kwh,temperature
0,2015-01-01 00:00:00,18001,Centro Catedral,359.454,-0.09
1,2015-01-01 00:00:00,18002,Albaicin_Bajo,439.901,-0.52
2,2015-01-01 00:00:00,18003,Albaicin_Alto,196.527,-0.13
3,2015-01-01 00:00:00,18004,Realejo,436.102,-0.33
4,2015-01-01 00:00:00,18005,Sacromonte,213.958,-0.39


## Espacios en blanco en zone_name

In [34]:
# Operación vectorizada para detectar espacios en 'zone_name'
s = consumo_granada['zone_name']
mask = s.str.contains(r'\s', na=False)
nombres_multi = consumo_granada[mask]
print('Filas detectadas:', int(mask.sum()))
if int(mask.sum()):
    display(nombres_multi.head())

Filas detectadas: 1071008


Unnamed: 0,timestamp,zone_id,zone_name,consumption_kwh,temperature
0,2015-01-01 00:00:00,18001,Centro Catedral,359.454,-0.09
2,2015-01-01 00:00:00,18003,Albaicin_Alto,196.527,-0.13
4,2015-01-01 00:00:00,18005,Sacromonte,213.958,-0.39
8,2015-01-01 00:00:00,18009,Camino_Ronda,1071.154,-0.78
9,2015-01-01 00:00:00,18010,FUENTENUEVA,423.021,0.12


## Mostrar valores de timestamp, fecha, hora, fechas y horas imposibles

In [35]:
import re

# Definir un patrón para fechas válidas (YYYY-MM-DD HH:MM:SS)
patron_fecha_valida = r'^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])\s([01]\d|2[0-3]):([0-5]\d):([0-5]\d)$'

# Filtrar filas donde el timestamp no cumpla con el patrón
fechas_invalidas = consumo_granada[
    ~consumo_granada['timestamp'].str.match(patron_fecha_valida, na=False)
]

fechas_invalidas

Unnamed: 0,timestamp,zone_id,zone_name,consumption_kwh,temperature


## Temperaturas máximas y mínimas

In [36]:
# Mostrar temperaturas máximas y mínimas: global, por zona y por día
# Asegurarse de que 'timestamp' es datetime y 'temperature' numeric
consumo_granada['timestamp'] = pd.to_datetime(consumo_granada['timestamp'], errors='coerce')
consumo_granada['temperature'] = pd.to_numeric(consumo_granada['temperature'], errors='coerce')
print('Filas:', len(consumo_granada))

# Máxima y mínima global y ejemplos de filas correspondientes
t_max = consumo_granada['temperature'].max()
t_min = consumo_granada['temperature'].min()
print(f'Valor máximo: {t_max}')
print(f'Valor mínimo: {t_min}')
print('\nEjemplos (más calientes):')
display(consumo_granada[consumo_granada['temperature'] == t_max].head())
print('\nEjemplos (más fríos):')
display(consumo_granada[consumo_granada['temperature'] == t_min].head())

# Máximas y mínimas por zona (ordenadas por máxima descendente)
per_zone = consumo_granada.groupby('zone_name', dropna=False)['temperature'].agg(['min','max','mean']).reset_index()
per_zone = per_zone.sort_values('max', ascending=False)
print('\nTemperaturas máximas y mínimas por zona:')
display(per_zone.head(20))

# Top 10 instantes más calientes y más fríos (con zona y timestamp)
print('\nTop 10 temperaturas más altas:')
display(consumo_granada.nlargest(10, 'temperature')[['timestamp','zone_name','temperature']])
print('\nTop 10 temperaturas más bajas:')
display(consumo_granada.nsmallest(10, 'temperature')[['timestamp','zone_name','temperature']])

Filas: 1947461
Valor máximo: 45.13
Valor mínimo: -6.82

Ejemplos (más calientes):


Unnamed: 0,timestamp,zone_id,zone_name,consumption_kwh,temperature
1320306,2022-06-16 15:00:00,18009,Camino_Ronda,6249.461,45.13
1320307,2022-06-16 15:00:00,18009,Camino_Ronda,6249.461,45.13



Ejemplos (más fríos):


Unnamed: 0,timestamp,zone_id,zone_name,consumption_kwh,temperature
902682,2020-02-06 03:00:00,18009,Camino_Ronda,534.046,-6.82



Temperaturas máximas y mínimas por zona:


Unnamed: 0,zone_name,min,max,mean
1,Camino_Ronda,-6.82,45.13,17.977341
8,BOLA DE ORO,-6.22,45.05,17.987093
7,Albaicin_Bajo,-6.28,44.87,17.98504
0,Albaicin_Alto,-6.32,44.74,17.978985
11,Chana_Bobadilla,-6.2,44.7,17.988788
4,FUENTENUEVA,-6.45,44.69,17.985916
10,Cartuja,-6.42,44.66,17.979239
16,Plaza_Toros,-6.22,44.53,17.99286
3,Cervantes,-6.1,44.48,17.989372
15,Pedro_Antonio,-6.11,44.3,17.987654



Top 10 temperaturas más altas:


Unnamed: 0,timestamp,zone_name,temperature
1320306,2022-06-16 15:00:00,Camino_Ronda,45.13
1320307,2022-06-16 15:00:00,Camino_Ronda,45.13
1320311,2022-06-16 15:00:00,BOLA DE ORO,45.05
1320299,2022-06-16 15:00:00,Albaicin_Bajo,44.87
1162739,2021-07-26 14:00:00,Albaicin_Alto,44.74
1320316,2022-06-16 15:00:00,Chana_Bobadilla,44.7
1320308,2022-06-16 15:00:00,FUENTENUEVA,44.69
1320304,2022-06-16 15:00:00,Cartuja,44.66
1320303,2022-06-16 15:00:00,Plaza_Toros,44.53
985332,2020-07-25 16:00:00,Cervantes,44.48



Top 10 temperaturas más bajas:


Unnamed: 0,timestamp,zone_name,temperature
902682,2020-02-06 03:00:00,Camino_Ronda,-6.82
192023,2016-02-01 03:00:00,PTS_TECNOLOGICO,-6.68
1252129,2022-01-27 01:00:00,Realejo,-6.52
1404993,2022-12-08 04:00:00,FUENTENUEVA,-6.45
1415148,2022-12-29 02:00:00,Cartuja,-6.42
1603735,2024-01-22 00:00:00,PTS_TECNOLOGICO,-6.37
1068894,2021-01-14 01:00:00,Camino_Ronda,-6.33
1588311,2023-12-21 04:00:00,NORTE ALMANJAYAR,-6.33
1062151,2020-12-31 04:00:00,Albaicin_Alto,-6.32
362697,2017-01-18 02:00:00,Centro Catedral,-6.3


## Consumo máximo y mínimo

In [37]:
# Mostrar consumo máximos y mínimos: global, por zona y por día
# Asegurarse de que 'timestamp' es datetime y 'consumption_kwh' numeric
consumo_granada['timestamp'] = pd.to_datetime(consumo_granada['timestamp'], errors='coerce')
consumo_granada['consumption_kwh'] = pd.to_numeric(consumo_granada['consumption_kwh'], errors='coerce')
print('Filas:', len(consumo_granada))

# Máxima y mínima global y ejemplos de filas correspondientes
c_max = consumo_granada['consumption_kwh'].max()
c_min = consumo_granada['consumption_kwh'].min()
print(f'Valor máximo consumo_kwh: {c_max}')
print(f'Valor mínimo consumo_kwh: {c_min}')
print('\nEjemplos (más consumo):')
display(consumo_granada[consumo_granada['consumption_kwh'] == c_max].head())
print('\nEjemplos (menos consumo):')
display(consumo_granada[consumo_granada['consumption_kwh'] == c_min].head())

# Máximas y mínimas por zona (ordenadas por máxima descendente)
per_zone = consumo_granada.groupby('zone_name', dropna=False)['consumption_kwh'].agg(['min','max','mean']).reset_index()
per_zone = per_zone.sort_values('max', ascending=False)
print('\nConsumo máximos y mínimos por zona:')
display(per_zone.head(20))

# Top 10 instantes con consumo más alto y más bajo (con zona y timestamp)
print('\nTop 10 consumos más altos:')
display(consumo_granada.nlargest(10, 'consumption_kwh')[['timestamp','zone_name','consumption_kwh']])
print('\nTop 10 consumos más bajos:')
display(consumo_granada.nsmallest(10, 'consumption_kwh')[['timestamp','zone_name','consumption_kwh']])

Filas: 1947461
Valor máximo consumo_kwh: 13387.546
Valor mínimo consumo_kwh: 0.0

Ejemplos (más consumo):


Unnamed: 0,timestamp,zone_id,zone_name,consumption_kwh,temperature
261690,2016-06-23 19:00:00,18017,PTS_TECNOLOGICO,13387.546,39.6



Ejemplos (menos consumo):


Unnamed: 0,timestamp,zone_id,zone_name,consumption_kwh,temperature
992,2015-01-03 01:00:00,18007,Cartuja,0.0,-3.75
1874,2015-01-04 21:00:00,18005,Sacromonte,0.0,3.93
7806,2015-01-17 03:00:00,18008,Pedro_Antonio,0.0,-5.14
8584,2015-01-18 18:00:00,18003,Albaicin_Alto,0.0,12.98
10340,2015-01-22 09:00:00,18001,Centro Catedral,0.0,5.57



Consumo máximos y mínimos por zona:


Unnamed: 0,zone_name,min,max,mean
14,PTS_TECNOLOGICO,0.0,13387.546,3446.536248
2,Centro Catedral,0.0,13092.904,1948.987758
12,MERCAGRANADA,0.0,11238.935,3845.173087
11,Chana_Bobadilla,0.0,8775.7,2904.298575
1,Camino_Ronda,0.0,8763.258,2454.105613
19,Zaidin Vergeles,0.0,7976.5,2537.87922
18,ZAIDIN NUEVO,0.0,7584.304,2353.526759
15,Pedro_Antonio,0.0,7000.185,1811.230428
16,Plaza_Toros,0.0,6784.674,1912.33858
9,CHANA_BARRIO,0.0,5977.661,1859.966301



Top 10 consumos más altos:


Unnamed: 0,timestamp,zone_name,consumption_kwh
261690,2016-06-23 19:00:00,PTS_TECNOLOGICO,13387.546
1320869,2022-06-17 19:00:00,Centro Catedral,13092.904
611619,2018-06-15 14:00:00,PTS_TECNOLOGICO,12081.85
1320885,2022-06-17 19:00:00,PTS_TECNOLOGICO,11871.768
618506,2018-06-29 19:00:00,PTS_TECNOLOGICO,11814.197
1856695,2025-06-26 19:00:00,PTS_TECNOLOGICO,11760.265
440615,2017-06-27 19:00:00,PTS_TECNOLOGICO,11724.652
270437,2016-07-11 19:00:00,PTS_TECNOLOGICO,11716.48
800269,2019-07-09 19:00:00,PTS_TECNOLOGICO,11709.297
94466,2015-07-14 19:00:00,PTS_TECNOLOGICO,11688.826



Top 10 consumos más bajos:


Unnamed: 0,timestamp,zone_name,consumption_kwh
992,2015-01-03 01:00:00,Cartuja,0.0
1874,2015-01-04 21:00:00,Sacromonte,0.0
7806,2015-01-17 03:00:00,Pedro_Antonio,0.0
8584,2015-01-18 18:00:00,Albaicin_Alto,0.0
10340,2015-01-22 09:00:00,Centro Catedral,0.0
11122,2015-01-24 00:00:00,Centro Catedral,0.0
11666,2015-01-25 02:00:00,Chana_Bobadilla,0.0
12774,2015-01-27 09:00:00,Chana_Bobadilla,0.0
13063,2015-01-28 00:00:00,Sacromonte,0.0
15069,2015-02-01 03:00:00,FUENTENUEVA,0.0
