# Meteorological features: 

Crear características de las variables meteorológicas

## Imports

In [1]:
import datetime
import pandas as pd
import warnings

from src.data.utils import (
    get_general_path, join_paths
)

## Configuration

In [2]:
warnings.filterwarnings('ignore')

## Constants

In [3]:
INTERIM_DATA = 'data/interim/'
METEOROLOGICAL_INFORMATION = 'meteorological_information'
METEOROLOGICAL_FEATURES = 'meteorological_features'
MUN_PLACEHOLDER = 'mun_id={}'
MUNICIPAL_DATA = 'municipal_data.pkl'

DAYS = 90 # 3 Months
LAG = 5 # 5 days

## Helper functions

In [4]:
def get_rolling_data_partition(mun_id):
    general_path = get_general_path()
    meteorological_information_path = join_paths(
        general_path, INTERIM_DATA, METEOROLOGICAL_INFORMATION, MUN_PLACEHOLDER.format(mun_id)
    )
    
    meteorological_features_path = join_paths(
        general_path, INTERIM_DATA, METEOROLOGICAL_FEATURES
    )

    mun_data = pd.read_parquet(meteorological_information_path)

    # Create additional features
    mun_data['TS_T2M_change'] = (mun_data['TS'] - mun_data['T2M'])/2
    mun_data['T2M_T10M_change'] = (mun_data['T2M'] - mun_data['T10M'])/8
    mun_data['change_difference'] = mun_data['TS_T2M_change' ] - mun_data['T2M_T10M_change']
    mun_data['T10M_range'] = mun_data['T10M_MAX'] - mun_data['T10M_MIN']
    mun_data['TS_TROPT_range'] = mun_data['TS'] - mun_data['TROPT']
    mun_data['PS_TROPPB_range'] = mun_data['PS'] - mun_data['TROPPB']
    mun_data['new_date'] = mun_data['date'] + datetime.timedelta(days=LAG)
    
    #Create rolling features
    non_rolling_columns = ['date']
    daily_information_rolling = mun_data.drop(
        non_rolling_columns, axis=1
    ).rolling(
        f'{DAYS}D', on='new_date', min_periods=DAYS, closed='left'
    )
    daily_information_rolling_info = {}
    daily_information_rolling_info['mean'] = daily_information_rolling.mean()
    daily_information_rolling_info['std'] = daily_information_rolling.std()
    daily_information_rolling_info['max'] = daily_information_rolling.max()
    daily_information_rolling_info['min'] = daily_information_rolling.min()
    daily_information_rolling_info['median'] = daily_information_rolling.median()
    daily_information_rolling_info['skew'] = daily_information_rolling.skew()
    daily_information_rolling_info['kurt'] = daily_information_rolling.kurt()
    daily_information_rolling_info['mean_vs_median'] = daily_information_rolling_info['mean'] - daily_information_rolling_info['median']
    daily_information_rolling_info['range'] = daily_information_rolling_info['max'] - daily_information_rolling_info['min']
    daily_information_rolling_dfs = []
    for operation, data in daily_information_rolling_info.items():
        rename_col_dict = {
            col:f'{col}__last{DAYS}_days_{operation}' 
            for col in data.drop('new_date', axis=1).columns}
        data.rename(columns=rename_col_dict, inplace=True)
        daily_information_rolling_dfs.append(data)
    concatenated_daily_information_rolling_df = pd.concat(daily_information_rolling_dfs, axis=1)
    concatenated_daily_information_rolling_df['mun_id'] = mun_id
    concatenated_daily_information_rolling_df.drop('new_date', axis=1, inplace=True)
    concatenated_daily_information_rolling_df.to_parquet(meteorological_features_path, partition_cols=['mun_id'])
    return None


## Read data


In [5]:
general_path = get_general_path()
municipal_data_path = join_paths(general_path, INTERIM_DATA, MUNICIPAL_DATA)

municipal_data = pd.read_pickle(municipal_data_path)

## Process data

In [6]:
municipal_data_list = []
for i, mun_id in enumerate(municipal_data.index):
    get_rolling_data_partition(mun_id)
    if not i%100:
        print(mun_id, i)

01_008 0
07_080 100
07_114 200
10_004 300
12_038 400
13_024 500
14_113 600
15_021 700
16_106 800
16_029 900
19_031 1000
20_106 1100
20_092 1200
20_192 1300
20_436 1400
20_449 1500
21_176 1600
21_145 1700
22_014 1800
25_006 1900
28_003 2000
30_051 2100
30_030 2200
30_206 2300
31_080 2400
