In [2]:
import pandas as pd

In [3]:
municipalities_of_interest = ['CARTAGENA DE INDIAS', 'BOGOTA, D.C', 'MEDELLÍN', 'CALI', 'CÚCUTA']

In [4]:
def filter_municipalities(csv_file, municipalities, chunksize=1000):
    """
    Filters specific municipalities from a large CSV file.

    Args:
        csv_file (str): Path to the CSV file to process.
        municipalities (list): List of municipalities to filter.
        chunksize (int, optional): Number of rows to read per chunk. Default is 1000.

    Returns:
        pd.DataFrame: DataFrame containing the filtered records.
    """
    filtered_chunks = []  # List to store filtered chunks

    # Read the file in chunks
    for chunk in pd.read_csv(csv_file, chunksize=chunksize):
        # Filter municipalities of interest
        chunk_filtered = chunk[chunk['Municipio'].isin(municipalities)]
        # Add the filtered chunk to the list
        filtered_chunks.append(chunk_filtered)

    # Combine all filtered chunks into a single DataFrame
    return pd.concat(filtered_chunks, ignore_index=True)

In [None]:

raw_rainfall_filtered = filter_municipalities('../data/raw/Precipitaci_n_20250120.csv', municipalities_of_interest)
raw_rainfall_filtered.to_csv('../data/filtered/rainfall_filtered.csv', index=False)

In [5]:
raw_air_humidity_filtered = filter_municipalities('../data/raw/Humedad_del_Aire_2_metros_20250120.csv', municipalities_of_interest)
raw_air_humidity_filtered.to_csv('../data/filtered/air_humidity_filtered.csv', index=False)

In [6]:
raw_presure_filtered = filter_municipalities('../data/raw/Presi_n_Atmosf_rica_20250120.csv', municipalities_of_interest)
raw_presure_filtered.to_csv('./filtered_data/pressure_filtered.csv', index=False)