<a href="https://colab.research.google.com/github/chris051091/Seminario-3/blob/master/preprocess/src/Humedad_Espec%C3%ADfica.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Humedad específica

Notebook para generar el dataset de humedad específica a partir de los datos de humedad relativa, temperatura e presión

In [12]:
import pandas as pd
import numpy as np
import os
import re
from functools import reduce

from google.colab import drive
drive.mount('/content/drive')

%matplotlib inline

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [13]:
PATH_DRIVE = 'drive/MyDrive/Seminario-3-master'
PATH_DADOS = '/preprocess/dados/'

# NOMBRE_ESTACION = 'chucuri'
NOMBRE_ESTACION = 'palanquero'
#NOMBRE_ESTACION = 'vizcaina'

PATH_FINAL_DADOS = PATH_DRIVE + PATH_DADOS + NOMBRE_ESTACION + '/'
PATH_FINAL_DADOS_TRATADOS = PATH_DRIVE + PATH_DADOS + NOMBRE_ESTACION + '/tratados/'

FILE_DATOS_TRATADOS_TEMP = NOMBRE_ESTACION.lower() + '_tratados_temp.csv'
FILE_DATOS_TRATADOS_HUM = NOMBRE_ESTACION.lower() + '_tratados_hum.csv'
FILE_DATOS_TRATADOS_PRES = NOMBRE_ESTACION.lower() + '_tratados_pres.csv'
FILE_DATOS_TRATADOS_HUM_ESPEC = NOMBRE_ESTACION.lower() + '_tratados_hum_especifica.csv'


## Cargando datos de Temperatura

In [14]:
dfTemp = pd.read_csv(PATH_FINAL_DADOS_TRATADOS + FILE_DATOS_TRATADOS_TEMP, 
                 sep = ';', 
                 parse_dates = ["FECHA"],
                 date_parser = lambda col: pd.to_datetime(col)
                )[['FECHA', 'TEMP']].copy()

dfTemp = dfTemp.sort_values(by=['FECHA']).reset_index(drop = True)
dfTemp

Unnamed: 0,FECHA,TEMP
0,2011-01-01 01:00:00-05:00,22.7
1,2011-01-01 02:00:00-05:00,23.1
2,2011-01-01 03:00:00-05:00,23.3
3,2011-01-01 04:00:00-05:00,24.1
4,2011-01-01 05:00:00-05:00,22.9
...,...,...
45464,2017-12-31 19:00:00-05:00,25.2
45465,2017-12-31 20:00:00-05:00,24.4
45466,2017-12-31 21:00:00-05:00,23.9
45467,2017-12-31 22:00:00-05:00,24.0


## Caragando datos de presión

In [15]:
dfPres = pd.read_csv(PATH_FINAL_DADOS_TRATADOS + FILE_DATOS_TRATADOS_PRES, 
                 sep = ';', 
                 parse_dates = ["FECHA"],
                 date_parser = lambda col: pd.to_datetime(col)
                )[['FECHA', 'PRESION']].copy()

dfPres = dfPres.sort_values(by=['FECHA']).reset_index(drop = True)
dfPres

Unnamed: 0,FECHA,PRESION
0,2011-01-01 01:00:00-05:00,988.8
1,2011-01-01 02:00:00-05:00,988.4
2,2011-01-01 03:00:00-05:00,988.2
3,2011-01-01 04:00:00-05:00,988.7
4,2011-01-01 05:00:00-05:00,989.4
...,...,...
46238,2017-12-31 19:00:00-05:00,989.1
46239,2017-12-31 20:00:00-05:00,990.1
46240,2017-12-31 21:00:00-05:00,991.4
46241,2017-12-31 22:00:00-05:00,991.9


## Cargando datos de humedad relativa

In [16]:
dfHum = pd.read_csv(PATH_FINAL_DADOS_TRATADOS + FILE_DATOS_TRATADOS_HUM, 
                 sep = ';', 
                 parse_dates = ["FECHA"],
                 date_parser = lambda col: pd.to_datetime(col)
                )[['FECHA', 'HUM']].copy()

dfHum = dfHum.sort_values(by=['FECHA']).reset_index(drop = True)
dfHum

Unnamed: 0,FECHA,HUM
0,2011-01-01 01:00:00-05:00,99.0
1,2011-01-01 02:00:00-05:00,99.0
2,2011-01-01 03:00:00-05:00,99.0
3,2011-01-01 04:00:00-05:00,95.0
4,2011-01-01 05:00:00-05:00,96.0
...,...,...
37997,2017-12-31 19:00:00-05:00,91.0
37998,2017-12-31 20:00:00-05:00,94.0
37999,2017-12-31 21:00:00-05:00,96.0
38000,2017-12-31 22:00:00-05:00,97.0


## Cruzando datasets

In [17]:
df_cross = (
    dfTemp.merge(dfPres, on=["FECHA"], how="inner")
          .merge(dfHum, on=["FECHA"], how="inner")
)

In [18]:
df_cross.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 37232 entries, 0 to 37231
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype                                 
---  ------   --------------  -----                                 
 0   FECHA    37232 non-null  datetime64[ns, pytz.FixedOffset(-300)]
 1   TEMP     37232 non-null  float64                               
 2   PRESION  37232 non-null  float64                               
 3   HUM      37232 non-null  float64                               
dtypes: datetime64[ns, pytz.FixedOffset(-300)](1), float64(3)
memory usage: 1.4 MB


## Procesamiento de los datos

In [19]:
df_proc = df_cross.copy()
df_proc["TEMP"] = df_proc["TEMP"]+273.15 ## Transformando en Kelvin
df_proc["HUM"] = df_proc["HUM"]/100
df_proc

Unnamed: 0,FECHA,TEMP,PRESION,HUM
0,2011-01-01 01:00:00-05:00,295.85,988.8,0.99
1,2011-01-01 02:00:00-05:00,296.25,988.4,0.99
2,2011-01-01 03:00:00-05:00,296.45,988.2,0.99
3,2011-01-01 04:00:00-05:00,297.25,988.7,0.95
4,2011-01-01 05:00:00-05:00,296.05,989.4,0.96
...,...,...,...,...
37227,2017-12-31 19:00:00-05:00,298.35,989.1,0.91
37228,2017-12-31 20:00:00-05:00,297.55,990.1,0.94
37229,2017-12-31 21:00:00-05:00,297.05,991.4,0.96
37230,2017-12-31 22:00:00-05:00,297.15,991.9,0.97


## Cálculo de la humedad específica

In [20]:
Rd = 287    # J/(kg K)
Rv = 461.5  # J/(kg K)
cpd= 1005   # J/(kg K)
cw = 4218   # J/(kg K)
lv = 2.5e6  # J/kg
g  = 9.81   # m/s2
epsilon = Rd/Rv


def es(T):
    # T en K
    # es en hPa
    return 6.11*np.exp(53.49-6808/T-5.09*np.log(T))

def e_v1(Td):
    # presion de vapor (en hPa) a partir de Td (Temperatura de Rocio) (en K)
    return es(Td)

#punto de rocío a partir de temperatura y humedad relativa:
def Tdew(T,rh):
    #rh adimensional, T en K:
    return T*lv/(lv-Rv*T*np.log(rh))

def w(Td,p):
    # Cálculo de la razon de mezcla (en kg/kg) a partir de Td (en K) y 
    # presión de vapor (en hPa)
    e=e_v1(Td)
    return epsilon*e/(p-e)

def q(w):
    # humedad especifica a partir de razon de mezcla (en kg/kg)
    return w/(1+w)

In [21]:
df_proc["TEMP_ROCIO"] = df_proc.apply(lambda x: Tdew(x['TEMP'], x['HUM']), axis=1)
df_proc["RAZON_MEZCLA"] = df_proc.apply(lambda x: w(x['TEMP_ROCIO'], x['PRESION']), axis=1)
df_proc["HUM_ESPECIFICA"] = df_proc.apply(lambda x: q(x['RAZON_MEZCLA']), axis=1) * 1000


df_proc

Unnamed: 0,FECHA,TEMP,PRESION,HUM,TEMP_ROCIO,RAZON_MEZCLA,HUM_ESPECIFICA
0,2011-01-01 01:00:00-05:00,295.85,988.8,0.99,295.687701,0.017843,17.530529
1,2011-01-01 02:00:00-05:00,296.25,988.4,0.99,296.087261,0.018301,17.971931
2,2011-01-01 03:00:00-05:00,296.45,988.2,0.99,296.287042,0.018534,18.196315
3,2011-01-01 04:00:00-05:00,297.25,988.7,0.95,296.415713,0.018673,18.330352
4,2011-01-01 05:00:00-05:00,296.05,989.4,0.96,295.390996,0.017505,17.203692
...,...,...,...,...,...,...,...
37227,2017-12-31 19:00:00-05:00,298.35,989.1,0.91,296.808318,0.019125,18.766236
37228,2017-12-31 20:00:00-05:00,297.55,990.1,0.94,296.542149,0.018792,18.445763
37229,2017-12-31 21:00:00-05:00,297.05,991.4,0.96,296.386541,0.018587,18.247382
37230,2017-12-31 22:00:00-05:00,297.15,991.9,0.97,296.654348,0.018888,18.538163


## Salvando Dataframe

In [22]:
df_proc[['FECHA', 'TEMP_ROCIO', 'RAZON_MEZCLA', 'HUM_ESPECIFICA']].to_csv(PATH_FINAL_DADOS_TRATADOS + FILE_DATOS_TRATADOS_HUM_ESPEC, header=True, index=False, sep = ';')