### Dependencias a instalar


```
!pip install polars
!pip install pyarrow
!pip3 install pyarrow

```

In [25]:
import polars as pl
import time
import os

### Project config constraints

In [26]:
file_csv_delimiter = ","
grouped_by = "Date"
unique_grouped_features = ["year", "month"]
output_prefix = "test"
fileName = './demo_records.csv'

### Main Project Functions

In [37]:
def load_csv(path):
    try:
        df = pl.scan_csv(path, separator=file_csv_delimiter)
        return df
    except FileNotFoundError:
        raise FileNotFoundError(f"File not found: {path}")
    except pl.exceptions.ComputeError as e:
        raise ValueError(f"Error processing file: {path}. Details: {str(e)}")
    except Exception as e:
        raise Exception(f"An unexpected error occurred while loading the file: {path}. Details: {str(e)}")

def get_file_info(path):
    file_size = os.path.getsize(path)
    
    size_in_kb = file_size / 1024
    size_in_mb = size_in_kb / 1024
    size_in_gb = size_in_mb / 1024
    print(f"File size: {size_in_gb:.2f} GB")


def add_constraints(df):
    df = df.with_columns([pl.col('Date').str.strptime(pl.Date, format="%Y-%m-%d")])

    df = df.with_columns([
        pl.col(grouped_by).dt.year().alias("year"),
        pl.col(grouped_by).dt.month().alias("month")
    ])
    return df

def get_unique_features_df(df):
    return df.select(unique_grouped_features).unique()

def save_file(df, name):
    df.collect().write_csv(name)
    print(f"Saved as: {name}")

# Función principal que realiza todo el procesamiento
def main(path):
    start_time = time.time()
    df = load_csv(path)
    get_file_info(path)
    
    df = add_constraints(df)
    unique_year_months = get_unique_features_df(df).collect()

    try:        
        for year, month in zip(unique_year_months["year"], unique_year_months["month"]):
            filtered_df = df.filter(
                (pl.col("year") == year) & 
                (pl.col("month") == month)
            )
            
            file_name = f"{output_prefix}_{year}_{month}.csv"
            save_file(filtered_df, file_name)
    except Exception as e:
        print(f"Something went wrong: {e}")

    end_time = time.time()
    execution_time = end_time - start_time
    print(f"Total execution time: {execution_time:.2f} seconds")

In [38]:
main(fileName)

Tamaño del archivo: 0.02 GB
Saved as: test_2024_2.csv
Saved as: test_2024_4.csv
Saved as: test_2024_1.csv
Saved as: test_2024_5.csv
Saved as: test_2024_3.csv
Total execution time: 0.27 seconds
