In [1]:
import pandas as pd

In [2]:
municipalities_of_interest = ['BOLÍVAR', 'ANTIOQUIA', 'CUNDINAMARCA', 'AMAZONAS', 'GUAINÍA', 'NARIÑO', 'TOLIMA', 'CAUCA', 'NORTE DE SANTANDER']

In [3]:
def filter_municipalities(csv_file, municipalities, output_file, chunksize=1000):
    """
    Filters specific municipalities from a large CSV file and writes the result to another CSV file.

    Args:
        csv_file (str): Path to the CSV file to process.
        municipalities (list): List of municipalities to filter.
        output_file (str): Path to the output CSV file.
        chunksize (int, optional): Number of rows to read per chunk. Default is 1000.
    """
    first_chunk = True
    for chunk in pd.read_csv(csv_file, chunksize=chunksize, dtype={'Departamento': 'category'}):
        chunk_filtered = chunk[chunk['Departamento'].isin(municipalities)]
        if first_chunk:
            chunk_filtered.to_csv(output_file, index=False, mode='w')
            first_chunk = False
        else:
            chunk_filtered.to_csv(output_file, index=False, mode='a', header=False)

In [4]:

raw_rainfall_filtered = filter_municipalities('../data/raw/Precipitaci_n_20250120.csv', municipalities_of_interest, '../data/filtered/rainfall_filtered.csv')

In [5]:
raw_air_humidity_filtered = filter_municipalities('../data/raw/Humedad_del_Aire_2_metros_20250120.csv', municipalities_of_interest, '../data/filtered/air_humidity_filtered.csv')

In [6]:
raw_presure_filtered = filter_municipalities('../data/raw/Presi_n_Atmosf_rica_20250120.csv', municipalities_of_interest, '../data/filtered/presure_filtered.csv')