<img src="https://industrial.uniandes.edu.co/sites/default/files/imagenes/uniandeslogo.png" alt="Universidad de los Andes" style="float: right; width: 300px; height: auto;">

# Cleaning Night-Lights

Autor: Juan Diego Heredia Ni√±o 

Email: jd.heredian@uniandes.edu.co

Date: Jan 2025

In [1]:
# Import necessary libraries
import pandas as pd  # For data manipulation and analysis
import numpy as np  # For numerical operations
import yaml  # To read YAML configuration files
from pathlib import Path  # For cross-platform file path handling

In [2]:
# Load directory paths from configuration file
with open('paths.yml', 'r') as file:
    paths = yaml.safe_load(file)  # Read and parse YAML file

# Create Path objects for each directory
raw = Path(paths['data']['raw'])  # Directory with raw data
temp = Path(paths['data']['temp'])  # Directory with temporary processed data
processed = Path(paths['data']['processed'])  # Directory with final processed data

In [None]:
df_lights = pd.read_stata(raw/'clavijo-night-lights'/'lights_harmonized.dta')  # Read a Stata file from the raw data directory

# Crear mun_code
df_lights['mun_code'] = df_lights['MpCodigo'].astype(str).str.zfill(5)

# Crear los 4 trimestres para cada fila
df_lights_quarterly = df_lights.loc[df_lights.index.repeat(4)].reset_index(drop=True)

# Crear la columna quarter
df_lights_quarterly['quarter'] = (df_lights_quarterly['YEAR'].astype(str) + 
                                   'Q' + 
                                   df_lights_quarterly.groupby(df_lights_quarterly.index // 4).cumcount().add(1).astype(str))

# Reordenar columnas (opcional)
df_lights_quarterly = df_lights_quarterly[['mun_code', 'quarter', 'luces_mean', 'luces_median', 'luces_stdev']]

df_lights_quarterly.to_parquet(temp/'clavijo-night-lights'/'lights_quarterly.parquet', index=False)