# Getting NASA Meteorological information:

Este notebook explora la posibilidad de descargar la información de la NASA. Posteriormente se debe "productivizar" por lo que servirá como base para el futuro. 

La descripción de la informacion meteorológica se obtuvo de este [link](https://gist.github.com/abelcallejo/d68e70f43ffa1c8c9f6b5e93010704b8).

# Imports

In [2]:
import datetime 
import json
import matplotlib.pyplot as plt
import os
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import requests


from src.data.utils import (
    get_general_path, join_paths, check_if_filepath_exists, save_as_pickle
)

## Configs

## Constants

In [3]:
METEO_RAW_DATA = 'data/raw/meteorological_info/'
MUNICIPAL_DATA_FILE = 'municipal_data.pkl'
INTERIM_DATA = 'data/interim/'
METEOROLOGICAL_FILE = "meteorological_information"

## Helper Functions

In [4]:
def get_time_from_string(string_date):
    date = datetime.datetime.strptime(string_date, '%Y%m%d' ) 
    return date

def obtain_df_from_meteorological_data(mun_id):
    general_path = get_general_path()
    filename = f"{mun_id}.pkl"
    meteorological_path = join_paths(general_path, METEO_RAW_DATA, filename)
    meteorological_data = pd.read_pickle(meteorological_path)
    meteorological_information = meteorological_data.get('properties').get('parameter')
    meteorological_information_df = pd.DataFrame(meteorological_information).reset_index()
    meteorological_information_df["date"] = meteorological_information_df['index'].apply(get_time_from_string)
    meteorological_information_df['mun_id'] = mun_id
    meteorological_information_df['mun_id__time'] = mun_id + '__' + meteorological_information_df['index'].astype('string')
    meteorological_information_df.set_index('mun_id__time', inplace=True)
    meteorological_information_df.drop(['index'], axis=1, inplace=True)
    return meteorological_information_df

## Read Data

In [5]:
general_path = get_general_path()
municipal_data_path = join_paths(general_path, INTERIM_DATA, MUNICIPAL_DATA_FILE)
meteorological_data_path = join_paths(general_path, METEO_RAW_DATA)
final_meteorological_data_path = join_paths(general_path, INTERIM_DATA, METEOROLOGICAL_FILE)

municipal_data = pd.read_pickle(municipal_data_path)

## Process Data

In [6]:
meteorological_files = [file.split('.')[0] for file in os.listdir(meteorological_data_path)]

In [7]:
mun_ids = sorted(list(set(meteorological_files).intersection(set(municipal_data.index))))

In [8]:
meteo_dfs = [obtain_df_from_meteorological_data(mun_id) for mun_id in mun_ids]

In [9]:
joined_meteo_dfs = pd.concat(meteo_dfs)

In [10]:
# Since saving into parquet did not work with all the municipalities, we need to spearate them. 

pt1 = joined_meteo_dfs[joined_meteo_dfs.mun_id <= '16_']
pt2 = joined_meteo_dfs[joined_meteo_dfs.mun_id > '16_']

## Results: 

In [None]:
pt1.to_parquet(final_meteorological_data_path, partition_cols=['mun_id'])
pt2.to_parquet(final_meteorological_data_path, partition_cols=['mun_id'])