# Merge initial and meteorological features

Juntar características de las variables meteorológicas y las variables iniciales

## Imports

In [1]:
import datetime
import pandas as pd
import warnings

from src.data.utils import (
    get_general_path, join_paths, save_dataframe
)

## Configuration

In [2]:
warnings.filterwarnings('ignore')

## Constants

In [3]:
INTERIM_DATA = 'data/interim/'
METEOROLOGICAL_FEATURES = 'meteorological_features'
INITIAL_FEATURES = 'initial_features.parquet'

METEOROLOGICAL_AND_INITIAL_FEATURES = 'meteorological_and_initial_features.parquet'

MUN_PLACEHOLDER = 'mun_id={}'
MUNICIPAL_DATA = 'municipal_data.pkl'

## Helper functions

In [4]:
def get_meteorological_features_partition(mun_id, relevant_index=None):
    
    general_path = get_general_path()
    meteorological_features_path = join_paths(
        general_path, INTERIM_DATA, METEOROLOGICAL_FEATURES, MUN_PLACEHOLDER.format(mun_id)
    )
    
    meteorological_features = pd.read_parquet(meteorological_features_path)
    if relevant_index is not None:
        final_index_list = sorted(list(set(meteorological_features.index).intersection(set(relevant_index))))
        meteorological_features_reduced = meteorological_features.loc[final_index_list]
        return meteorological_features_reduced
    else:
        return meteorological_features

## Read data


In [5]:
general_path = get_general_path()

initial_features_path = join_paths(general_path, INTERIM_DATA, INITIAL_FEATURES)
initial_features = pd.read_parquet(initial_features_path)

municipal_data_path = join_paths(general_path, INTERIM_DATA, MUNICIPAL_DATA)
municipal_data = pd.read_pickle(municipal_data_path)

## Process data

In [6]:
meteorological_features_list = []
for i, mun_id in enumerate(municipal_data.index):
    mf = get_meteorological_features_partition(mun_id=mun_id, relevant_index=initial_features.index)
    meteorological_features_list.append(mf)
    if not i%100:
        print(mun_id, i)

01_008 0
07_080 100
07_114 200
10_004 300
12_038 400
13_024 500
14_113 600
15_021 700
16_106 800
16_029 900
19_031 1000
20_106 1100
20_092 1200
20_192 1300
20_436 1400
20_449 1500
21_176 1600
21_145 1700
22_014 1800
25_006 1900
28_003 2000
30_051 2100
30_030 2200
30_206 2300
31_080 2400


In [7]:
meteorological_features = pd.concat(meteorological_features_list)

In [8]:
meteorological_and_initial_features = pd.concat([initial_features, meteorological_features], axis=1)

## Results

In [9]:
meteorological_and_initial_features

Unnamed: 0,mun_id,num_drought_index,standard_date,num_drought_index__clv_oc__mean,num_drought_index__clv_oc__std,num_drought_index__clv_oc__max,num_drought_index__clv_oc__min,num_drought_index__clv_oc__median,num_drought_index__cve_conc__mean,num_drought_index__cve_conc__std,...,T2M_T10M_change__last90_days_range,TO3__last90_days_range,TQV__last90_days_range,TROPPB__last90_days_range,TROPQ__last90_days_range,TROPT__last90_days_range,TS__last90_days_range,TS_T2M_change__last90_days_range,TS_TROPT_range__last90_days_range,change_difference__last90_days_range
01_001__20160115,01_001,0.0,2016-01-15,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,...,0.20375,51.47,27.69,5.80,0.02,16.12,12.09,1.625,21.24,1.43750
01_002__20160115,01_002,0.0,2016-01-15,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,...,0.22250,51.06,27.41,5.89,0.02,16.81,11.72,1.790,21.02,1.63500
01_003__20160115,01_003,0.0,2016-01-15,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,...,0.20375,51.47,27.69,5.80,0.02,16.12,12.09,1.625,21.24,1.43750
01_004__20160115,01_004,0.0,2016-01-15,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,...,0.23500,53.70,27.10,6.18,0.02,15.46,12.51,1.695,23.52,1.55500
01_005__20160115,01_005,0.0,2016-01-15,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,...,0.20375,51.47,27.69,5.80,0.02,16.12,12.09,1.625,21.24,1.43750
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32_054__20240615,32_054,1.0,2024-06-15,2.380952,1.096597,4.0,0.0,2.0,2.191489,1.244786,...,0.14375,42.49,22.15,6.23,0.01,13.93,15.82,1.910,25.33,1.81000
32_055__20240615,32_055,3.0,2024-06-15,2.429448,0.859118,4.0,0.0,3.0,2.064516,0.749006,...,0.17875,45.25,22.58,7.10,0.02,13.91,17.46,2.255,27.74,2.08250
32_056__20240615,32_056,2.0,2024-06-15,2.429448,0.859118,4.0,0.0,3.0,2.064516,0.749006,...,0.20375,44.10,21.65,6.20,0.01,14.01,16.68,2.180,25.61,1.98625
32_057__20240615,32_057,1.0,2024-06-15,2.380952,1.096597,4.0,0.0,2.0,2.191489,1.244786,...,0.18375,44.07,21.54,6.68,0.02,14.64,18.17,2.100,28.66,1.95375


## Conclusions

In [10]:
meteorological_and_initial_features_path = join_paths(general_path, INTERIM_DATA, METEOROLOGICAL_AND_INITIAL_FEATURES)
save_dataframe(
    filepath=meteorological_and_initial_features_path,
    dataframe=meteorological_and_initial_features, 
    file_format='parquet'
)

data was saved into `/mnt/c/Users/dhdzm/Documents/projects/seguia/seguia/src/data/../../data/interim/meteorological_and_initial_features.parquet`.
