<a href="https://colab.research.google.com/github/chris051091/Tesis_Maestria/blob/main/preprocess/src/Humedad_Espec%C3%ADfica.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Humedad específica

Notebook para generar el dataset de humedad específica a partir de los datos de humedad relativa, temperatura e presión

In [1]:
import pandas as pd
import numpy as np
import os
import re
from functools import reduce

from google.colab import drive
drive.mount('/content/drive')

%matplotlib inline

Mounted at /content/drive


In [12]:
PATH_DRIVE = 'drive/MyDrive/Seminario-3-master'
PATH_DADOS = '/preprocess/dados/'

# NOMBRE_ESTACION = 'chucuri'
# NOMBRE_ESTACION = 'palanquero'
NOMBRE_ESTACION = 'vizcaina'

PATH_FINAL_DADOS = PATH_DRIVE + PATH_DADOS + NOMBRE_ESTACION + '/'
PATH_FINAL_DADOS_TRATADOS = PATH_DRIVE + PATH_DADOS + NOMBRE_ESTACION + '/tratados/'

FILE_DATOS_TRATADOS_TEMP = NOMBRE_ESTACION.lower() + '_tratados_temp.csv'
FILE_DATOS_TRATADOS_HUM = NOMBRE_ESTACION.lower() + '_tratados_hum.csv'
FILE_DATOS_TRATADOS_PRES = NOMBRE_ESTACION.lower() + '_tratados_pres.csv'
FILE_DATOS_TRATADOS_HUM_ESPEC = NOMBRE_ESTACION.lower() + '_tratados_hum_especifica.csv'


## Cargando datos de Temperatura

In [13]:
dfTemp = pd.read_csv(PATH_FINAL_DADOS_TRATADOS + FILE_DATOS_TRATADOS_TEMP,
                 sep = ';',
                 parse_dates = ["FECHA"],
                 date_parser = lambda col: pd.to_datetime(col)
                )[['FECHA', 'TEMP']].copy()

dfTemp = dfTemp.sort_values(by=['FECHA']).reset_index(drop = True)
dfTemp

Unnamed: 0,FECHA,TEMP
0,2011-01-01 01:00:00-05:00,23.7
1,2011-01-01 02:00:00-05:00,23.1
2,2011-01-01 03:00:00-05:00,22.8
3,2011-01-01 04:00:00-05:00,22.4
4,2011-01-01 05:00:00-05:00,22.1
...,...,...
49862,2017-12-31 19:00:00-05:00,25.9
49863,2017-12-31 20:00:00-05:00,25.1
49864,2017-12-31 21:00:00-05:00,24.7
49865,2017-12-31 22:00:00-05:00,24.7


## Caragando datos de presión

In [14]:
dfPres = pd.read_csv(PATH_FINAL_DADOS_TRATADOS + FILE_DATOS_TRATADOS_PRES,
                 sep = ';',
                 parse_dates = ["FECHA"],
                 date_parser = lambda col: pd.to_datetime(col)
                )[['FECHA', 'PRESION']].copy()

dfPres = dfPres.sort_values(by=['FECHA']).reset_index(drop = True)
dfPres

Unnamed: 0,FECHA,PRESION
0,2014-06-14 11:00:00-05:00,998.2
1,2014-06-14 12:00:00-05:00,997.2
2,2014-06-14 13:00:00-05:00,996.0
3,2014-06-14 14:00:00-05:00,995.0
4,2014-06-14 15:00:00-05:00,994.2
...,...,...
28942,2017-12-31 19:00:00-05:00,998.7
28943,2017-12-31 20:00:00-05:00,999.6
28944,2017-12-31 21:00:00-05:00,1000.3
28945,2017-12-31 22:00:00-05:00,1001.1


## Cargando datos de humedad relativa

In [15]:
dfHum = pd.read_csv(PATH_FINAL_DADOS_TRATADOS + FILE_DATOS_TRATADOS_HUM,
                 sep = ';',
                 parse_dates = ["FECHA"],
                 date_parser = lambda col: pd.to_datetime(col)
                )[['FECHA', 'HUM']].copy()

dfHum = dfHum.sort_values(by=['FECHA']).reset_index(drop = True)
dfHum

Unnamed: 0,FECHA,HUM
0,2011-01-01 01:00:00-05:00,98.0
1,2011-01-01 02:00:00-05:00,99.0
2,2011-01-01 03:00:00-05:00,100.0
3,2011-01-01 04:00:00-05:00,100.0
4,2011-01-01 05:00:00-05:00,100.0
...,...,...
51268,2017-12-31 19:00:00-05:00,93.0
51269,2017-12-31 20:00:00-05:00,97.0
51270,2017-12-31 21:00:00-05:00,99.0
51271,2017-12-31 22:00:00-05:00,99.0


## Cruzando datasets

In [16]:
df_cross = (
    dfTemp.merge(dfPres, on=["FECHA"], how="inner")
          .merge(dfHum, on=["FECHA"], how="inner")
)

In [17]:
df_cross.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 28125 entries, 0 to 28124
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype                                 
---  ------   --------------  -----                                 
 0   FECHA    28125 non-null  datetime64[ns, pytz.FixedOffset(-300)]
 1   TEMP     28125 non-null  float64                               
 2   PRESION  28125 non-null  float64                               
 3   HUM      28125 non-null  float64                               
dtypes: datetime64[ns, pytz.FixedOffset(-300)](1), float64(3)
memory usage: 1.1 MB


## Procesamiento de los datos

In [18]:
df_proc = df_cross.copy()
df_proc["TEMP"] = df_proc["TEMP"]+273.15 ## Transformando en Kelvin
df_proc["HUM"] = df_proc["HUM"]/100
df_proc

Unnamed: 0,FECHA,TEMP,PRESION,HUM
0,2014-06-14 11:00:00-05:00,308.45,998.2,0.61
1,2014-06-14 12:00:00-05:00,307.95,997.2,0.59
2,2014-06-14 13:00:00-05:00,308.45,996.0,0.55
3,2014-06-14 14:00:00-05:00,308.65,995.0,0.57
4,2014-06-14 15:00:00-05:00,307.95,994.2,0.59
...,...,...,...,...
28120,2017-12-31 19:00:00-05:00,299.05,998.7,0.93
28121,2017-12-31 20:00:00-05:00,298.25,999.6,0.97
28122,2017-12-31 21:00:00-05:00,297.85,1000.3,0.99
28123,2017-12-31 22:00:00-05:00,297.85,1001.1,0.99


## Cálculo de la humedad específica

In [19]:
Rd = 287    # J/(kg K)
Rv = 461.5  # J/(kg K)
cpd= 1005   # J/(kg K)
cw = 4218   # J/(kg K)
lv = 2.5e6  # J/kg
g  = 9.81   # m/s2
epsilon = Rd/Rv


def es(T):
    # T en K
    # es en hPa
    return 6.11*np.exp(53.49-6808/T-5.09*np.log(T))

def e_v1(Td):
    # presion de vapor (en hPa) a partir de Td (Temperatura de Rocio) (en K)
    return es(Td)

#punto de rocío a partir de temperatura y humedad relativa:
def Tdew(T,rh):
    #rh adimensional, T en K:
    return T*lv/(lv-Rv*T*np.log(rh))

def w(Td,p):
    # Cálculo de la razon de mezcla (en kg/kg) a partir de Td (en K) y
    # presión de vapor (en hPa)
    e=e_v1(Td)
    return epsilon*e/(p-e)

def q(w):
    # humedad especifica a partir de razon de mezcla (en kg/kg)
    return w/(1+w)

In [20]:
df_proc["TEMP_ROCIO"] = df_proc.apply(lambda x: Tdew(x['TEMP'], x['HUM']), axis=1)
df_proc["RAZON_MEZCLA"] = df_proc.apply(lambda x: w(x['TEMP_ROCIO'], x['PRESION']), axis=1)
df_proc["HUM_ESPECIFICA"] = df_proc.apply(lambda x: q(x['RAZON_MEZCLA']), axis=1) * 1000


df_proc

Unnamed: 0,FECHA,TEMP,PRESION,HUM,TEMP_ROCIO,RAZON_MEZCLA,HUM_ESPECIFICA
0,2014-06-14 11:00:00-05:00,308.45,998.2,0.61,300.006273,0.023056,22.536129
1,2014-06-14 12:00:00-05:00,307.95,997.2,0.59,298.982139,0.021681,21.221322
2,2014-06-14 13:00:00-05:00,308.45,996.0,0.55,298.295784,0.020814,20.389417
3,2014-06-14 14:00:00-05:00,308.65,995.0,0.57,299.071421,0.021850,21.382868
4,2014-06-14 15:00:00-05:00,307.95,994.2,0.59,298.982139,0.021749,21.286187
...,...,...,...,...,...,...,...
28120,2017-12-31 19:00:00-05:00,299.05,998.7,0.93,297.856715,0.020203,19.802549
28121,2017-12-31 20:00:00-05:00,298.25,999.6,0.97,297.750675,0.020052,19.658219
28122,2017-12-31 21:00:00-05:00,297.85,1000.3,0.99,297.685499,0.019958,19.567088
28123,2017-12-31 22:00:00-05:00,297.85,1001.1,0.99,297.685499,0.019941,19.551266


## Salvando Dataframe

In [21]:
df_proc[['FECHA', 'TEMP_ROCIO', 'RAZON_MEZCLA', 'HUM_ESPECIFICA']].to_csv(PATH_FINAL_DADOS_TRATADOS + FILE_DATOS_TRATADOS_HUM_ESPEC, header=True, index=False, sep = ';')