In [1]:
import pandas as pd

In [2]:
import numpy as np

In [3]:
import requests
import gzip
import os
import time
import dask.dataframe as dd
from dask.diagnostics import ProgressBar
import tempfile

# Solicitar el directorio de descarga al usuario
download_directory = input("Directorio donde descargar el archivo, terminado en /: ")

# Asegurarse de que el directorio ingresado termine con una barra diagonal
if not download_directory.endswith('/'):
    download_directory += '/'

# Definir la URL y el path local del archivo descargado
url = 'https://static.openfoodfacts.org/data/en.openfoodfacts.org.products.csv.gz'
local_gz_path = os.path.join(download_directory, 'TFM_OPF.csv.gz')

# Descargar el archivo .gz
print("Descargando el archivo .gz...")
response = requests.get(url, stream=True)
with open(local_gz_path, 'wb') as f:
    for chunk in response.iter_content(chunk_size=1024):
        f.write(chunk)

# Verificar si el archivo .gz se ha descargado correctamente
if not os.path.exists(local_gz_path):
    raise FileNotFoundError(f"El archivo {local_gz_path} no se descarg√≥ correctamente.")
else:
    print(f"El archivo {local_gz_path} se descarg√≥ correctamente.")

# Paso 2: Descomprimir el archivo .gz en fragmentos
def decompress_in_chunks(gz_path, output_path, chunk_size=1024*1024):
    with gzip.open(gz_path, 'rb') as f_in:
        with open(output_path, 'wb') as f_out:
            while True:
                chunk = f_in.read(chunk_size)
                if not chunk:
                    break
                f_out.write(chunk)
                print(f"Descomprimido {f_out.tell()} bytes")

# Usar un archivo temporal para la descompresi√≥n
with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_csv_file:
    temp_csv_path = temp_csv_file.name

print(f"Descomprimiendo el archivo .gz a un archivo temporal: {temp_csv_path}")
decompress_in_chunks(local_gz_path, temp_csv_path)

# Verificar si el archivo .csv se ha descomprimido correctamente
if not os.path.exists(temp_csv_path):
    raise FileNotFoundError(f"El archivo {temp_csv_path} no se descomprimi√≥ correctamente.")
else:
    print(f"El archivo {temp_csv_path} se descomprimi√≥ correctamente.")

# Paso 3: Inspeccionar manualmente las primeras l√≠neas del archivo CSV
print("Inspeccionando las primeras l√≠neas del archivo CSV descomprimido:")
with open(temp_csv_path, 'r', encoding='utf-8') as f:
    for i in range(10):
        line = f.readline().strip()
        print(line)
        if not line:
            break

# Especifica el separador correcto
separator = '\t'  # Especifica el separador que utiliza tu archivo CSV
encoding = 'utf-8'

# Especificar los tipos de datos para las columnas problem√°ticas
dtype = {
    'additives_en': 'object',
    'additives_tags': 'object',
    'allergens': 'object',
    'brand_owner': 'object',
    'cities_tags': 'object',
    'emb_codes': 'object',
    'emb_codes_tags': 'object',
    'first_packaging_code_geo': 'object',
    'generic_name': 'object',
    'manufacturing_places': 'object',
    'manufacturing_places_tags': 'object',
    'no_nutrition_data': 'object',
    'packaging': 'object',
    'packaging_en': 'object',
    'packaging_tags': 'object',
    'packaging_text': 'object',
    'purchase_places': 'object',
    'serving_size': 'object',
    'stores': 'object',
    'traces': 'object',
    'traces_en': 'object',
    'traces_tags': 'object',
    'abbreviated_product_name': 'object',
    'owner': 'object',
    'additives_n': 'object',
    'cities': 'object',
    'nutriscore_score': 'object',
    'serving_quantity': 'object'
}

# Medir el tiempo de ejecuci√≥n y usar ProgressBar para monitorear el progreso
start_time = time.time()

# Leer una peque√±a muestra del archivo para determinar el delimitador y otros par√°metros
sample_size = 10000  # N√∫mero de filas de la muestra

try:
    print("Leyendo una muestra del archivo CSV...")
    sample_df = dd.read_csv(temp_csv_path, blocksize=25e6, sample=sample_size, assume_missing=True, sep=separator, on_bad_lines='skip', dtype=dtype)
    # Mostrar informaci√≥n preliminar sobre el dataframe
    print(f"Columnas: {sample_df.columns.tolist()}")
    print(f"N√∫mero de columnas: {sample_df.shape[1]}")
except Exception as e:
    print(f"Error al leer el archivo CSV: {e}")

try:
    print("Leyendo el archivo CSV completo...")
    with ProgressBar():
        df = dd.read_csv(temp_csv_path, blocksize=25e6, assume_missing=True, sep=separator, encoding=encoding, on_bad_lines='skip', dtype=dtype)
        # Paso 5: An√°lisis Preliminar del Dataset
        print(f"Columnas: {df.columns.tolist()}")
        print(f"N√∫mero de filas: {df.shape[0].compute()}")
        print(f"N√∫mero de columnas: {df.shape[1]}")

        # Describir el dataset para obtener un resumen estad√≠stico
        print(df.describe().compute())

        # Mostrar las primeras filas del dataset
        print(df.head())

        # Mostrar el tipo de datos de cada columna
        print(df.dtypes)
except Exception as e:
    print(f"Error al procesar el archivo CSV completo: {e}")

end_time = time.time()
print(f"Tiempo total de ejecuci√≥n: {end_time - start_time} segundos")


Directorio donde descargar el archivo, terminado en /:  C:/TFM_OPF/


Descargando el archivo .gz...
El archivo C:/TFM_OPF/TFM_OPF.csv.gz se descarg√≥ correctamente.
Descomprimiendo el archivo .gz a un archivo temporal: C:\Users\crist\AppData\Local\Temp\tmpgpovconm.csv
Descomprimido 1048576 bytes
Descomprimido 2097152 bytes
Descomprimido 3145728 bytes
Descomprimido 4194304 bytes
Descomprimido 5242880 bytes
Descomprimido 6291456 bytes
Descomprimido 7340032 bytes
Descomprimido 8388608 bytes
Descomprimido 9437184 bytes
Descomprimido 10485760 bytes
Descomprimido 11534336 bytes
Descomprimido 12582912 bytes
Descomprimido 13631488 bytes
Descomprimido 14680064 bytes
Descomprimido 15728640 bytes
Descomprimido 16777216 bytes
Descomprimido 17825792 bytes
Descomprimido 18874368 bytes
Descomprimido 19922944 bytes
Descomprimido 20971520 bytes
Descomprimido 22020096 bytes
Descomprimido 23068672 bytes
Descomprimido 24117248 bytes
Descomprimido 25165824 bytes
Descomprimido 26214400 bytes
Descomprimido 27262976 bytes
Descomprimido 28311552 bytes
Descomprimido 29360128 byte

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[                                        ] | 0% Completed | 4.72 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[                                        ] | 0% Completed | 5.13 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[                                        ] | 1% Completed | 5.59 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#                                       ] | 4% Completed | 10.93 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#                                       ] | 4% Completed | 11.28 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#                                       ] | 4% Completed | 11.89 s

  df = reader(bio, **kwargs)


[###                                     ] | 8% Completed | 17.63 s

  df = reader(bio, **kwargs)


[###                                     ] | 8% Completed | 18.16 s

  df = reader(bio, **kwargs)


[#####                                   ] | 12% Completed | 24.40 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#####                                   ] | 12% Completed | 24.75 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#####                                   ] | 12% Completed | 24.99 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#####                                   ] | 13% Completed | 25.38 s

  df = reader(bio, **kwargs)


[#####                                   ] | 13% Completed | 25.82 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[######                                  ] | 15% Completed | 27.72 s

  df = reader(bio, **kwargs)


[######                                  ] | 16% Completed | 31.77 s

  df = reader(bio, **kwargs)


[######                                  ] | 16% Completed | 32.03 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[######                                  ] | 17% Completed | 32.41 s

  df = reader(bio, **kwargs)


[#######                                 ] | 19% Completed | 33.75 s

  df = reader(bio, **kwargs)


[########                                ] | 20% Completed | 34.76 s

  df = reader(bio, **kwargs)


[########                                ] | 21% Completed | 38.95 s

  df = reader(bio, **kwargs)


[#########                               ] | 23% Completed | 40.31 s

  df = reader(bio, **kwargs)


[#########                               ] | 24% Completed | 41.37 s

  df = reader(bio, **kwargs)


[##########                              ] | 25% Completed | 45.91 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[##########                              ] | 25% Completed | 46.72 s

  df = reader(bio, **kwargs)


[##########                              ] | 27% Completed | 47.62 s

  df = reader(bio, **kwargs)


[###########                             ] | 28% Completed | 49.24 s

  df = reader(bio, **kwargs)


[###########                             ] | 28% Completed | 49.61 s

  df = reader(bio, **kwargs)


[###########                             ] | 28% Completed | 49.99 s

  df = reader(bio, **kwargs)


[###########                             ] | 28% Completed | 50.93 s

  df = reader(bio, **kwargs)


[###########                             ] | 29% Completed | 53.21 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[###########                             ] | 29% Completed | 53.42 s

  df = reader(bio, **kwargs)


[###########                             ] | 29% Completed | 53.71 s

  df = reader(bio, **kwargs)


[############                            ] | 30% Completed | 54.31 s

  df = reader(bio, **kwargs)


[############                            ] | 30% Completed | 54.60 s

  df = reader(bio, **kwargs)


[#############                           ] | 33% Completed | 58.55 s

  df = reader(bio, **kwargs)


[#############                           ] | 33% Completed | 59.67 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#############                           ] | 34% Completed | 60.75 s

  df = reader(bio, **kwargs)


[##############                          ] | 36% Completed | 62.66 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[###############                         ] | 38% Completed | 67.04 s

  df = reader(bio, **kwargs)


[################                        ] | 40% Completed | 69.26 s

  df = reader(bio, **kwargs)


[################                        ] | 41% Completed | 72.54 s

  df = reader(bio, **kwargs)


[################                        ] | 41% Completed | 72.90 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[################                        ] | 42% Completed | 73.14 s

  df = reader(bio, **kwargs)


[#################                       ] | 43% Completed | 74.05 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#################                       ] | 44% Completed | 76.17 s

  df = reader(bio, **kwargs)


[##################                      ] | 45% Completed | 77.10 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[##################                      ] | 46% Completed | 79.81 s

  df = reader(bio, **kwargs)


[###################                     ] | 48% Completed | 82.04 s

  df = reader(bio, **kwargs)


[###################                     ] | 49% Completed | 84.66 s

  df = reader(bio, **kwargs)


[####################                    ] | 52% Completed | 89.24 s

  df = reader(bio, **kwargs)


[#####################                   ] | 52% Completed | 89.93 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#####################                   ] | 53% Completed | 90.87 s

  df = reader(bio, **kwargs)


[######################                  ] | 55% Completed | 95.64 s

  df = reader(bio, **kwargs)


[#######################                 ] | 58% Completed | 99.59 s

  df = reader(bio, **kwargs)


[#######################                 ] | 59% Completed | 102.38 s

  df = reader(bio, **kwargs)


[#######################                 ] | 59% Completed | 103.16 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[########################                ] | 61% Completed | 105.60 s

  df = reader(bio, **kwargs)


[########################                ] | 61% Completed | 105.98 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[########################                ] | 62% Completed | 106.35 s

  df = reader(bio, **kwargs)


[#########################               ] | 63% Completed | 109.56 s

  df = reader(bio, **kwargs)


[#########################               ] | 64% Completed | 110.34 s

  df = reader(bio, **kwargs)


[#########################               ] | 64% Completed | 110.84 s

  df = reader(bio, **kwargs)


[##########################              ] | 66% Completed | 112.80 s

  df = reader(bio, **kwargs)


[##########################              ] | 66% Completed | 113.22 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[###########################             ] | 68% Completed | 116.13 s

  df = reader(bio, **kwargs)


[###########################             ] | 68% Completed | 116.50 s

  df = reader(bio, **kwargs)


[###########################             ] | 68% Completed | 116.92 s

  df = reader(bio, **kwargs)


[###########################             ] | 68% Completed | 117.55 s

  df = reader(bio, **kwargs)


[###########################             ] | 68% Completed | 117.78 s

  df = reader(bio, **kwargs)


[###########################             ] | 69% Completed | 118.45 s

  df = reader(bio, **kwargs)


[############################            ] | 70% Completed | 119.56 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[############################            ] | 71% Completed | 123.12 s

  df = reader(bio, **kwargs)


[############################            ] | 71% Completed | 123.45 s

  df = reader(bio, **kwargs)


[############################            ] | 72% Completed | 123.70 s

  df = reader(bio, **kwargs)


[#############################           ] | 72% Completed | 125.12 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#############################           ] | 73% Completed | 126.05 s

  df = reader(bio, **kwargs)


[##############################          ] | 75% Completed | 127.53 s

  df = reader(bio, **kwargs)


[##############################          ] | 76% Completed | 130.04 s

  df = reader(bio, **kwargs)


[##############################          ] | 76% Completed | 131.40 s

  df = reader(bio, **kwargs)


[##############################          ] | 77% Completed | 131.83 s

  df = reader(bio, **kwargs)


[###############################         ] | 77% Completed | 132.27 s

  df = reader(bio, **kwargs)


[###############################         ] | 77% Completed | 132.87 s

  df = reader(bio, **kwargs)


[###############################         ] | 78% Completed | 133.55 s

  df = reader(bio, **kwargs)


[###############################         ] | 78% Completed | 133.85 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[################################        ] | 80% Completed | 137.69 s

  df = reader(bio, **kwargs)


[################################        ] | 82% Completed | 140.43 s

  df = reader(bio, **kwargs)


[#################################       ] | 83% Completed | 141.32 s

  df = reader(bio, **kwargs)


[#################################       ] | 83% Completed | 141.59 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#################################       ] | 84% Completed | 144.34 s

  df = reader(bio, **kwargs)


[#################################       ] | 84% Completed | 145.48 s

  df = reader(bio, **kwargs)


[#################################       ] | 84% Completed | 145.59 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[##################################      ] | 86% Completed | 147.60 s

  df = reader(bio, **kwargs)


[##################################      ] | 87% Completed | 148.25 s

  df = reader(bio, **kwargs)


[###################################     ] | 87% Completed | 148.39 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[###################################     ] | 88% Completed | 150.74 s

  df = reader(bio, **kwargs)


[###################################     ] | 89% Completed | 152.44 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[###################################     ] | 89% Completed | 153.04 s

  df = reader(bio, **kwargs)


[####################################    ] | 90% Completed | 153.56 s

  df = reader(bio, **kwargs)


[####################################    ] | 90% Completed | 153.94 s

  df = reader(bio, **kwargs)


[####################################    ] | 92% Completed | 156.37 s

  df = reader(bio, **kwargs)


[#####################################   ] | 93% Completed | 157.52 s

  df = reader(bio, **kwargs)


[#####################################   ] | 93% Completed | 158.91 s

  df = reader(bio, **kwargs)


[#####################################   ] | 94% Completed | 160.02 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#####################################   ] | 94% Completed | 160.69 s

  df = reader(bio, **kwargs)


[######################################  ] | 95% Completed | 161.17 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[######################################  ] | 96% Completed | 162.67 s

  df = reader(bio, **kwargs)


[######################################  ] | 96% Completed | 163.15 s

  df = reader(bio, **kwargs)


[####################################### ] | 97% Completed | 164.30 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[####################################### ] | 98% Completed | 164.80 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[####################################### ] | 99% Completed | 165.43 s

  df = reader(bio, **kwargs)


[########################################] | 100% Completed | 165.66 s
N√∫mero de filas: 3236626
N√∫mero de columnas: 206
[                                        ] | 0% Completed | 7.26 s ms

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[                                        ] | 0% Completed | 7.52 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[                                        ] | 0% Completed | 8.02 s

  df = reader(bio, **kwargs)


[                                        ] | 0% Completed | 8.32 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[                                        ] | 0% Completed | 8.43 s

  df = reader(bio, **kwargs)


[#                                       ] | 4% Completed | 18.20 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#                                       ] | 4% Completed | 18.34 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#                                       ] | 4% Completed | 19.66 s

  df = reader(bio, **kwargs)


[###                                     ] | 8% Completed | 29.02 s

  df = reader(bio, **kwargs)


[###                                     ] | 8% Completed | 29.74 s

  df = reader(bio, **kwargs)


[#####                                   ] | 12% Completed | 39.91 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#####                                   ] | 12% Completed | 40.03 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#####                                   ] | 12% Completed | 40.43 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[######                                  ] | 16% Completed | 50.77 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[######                                  ] | 16% Completed | 51.01 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[######                                  ] | 16% Completed | 52.38 s

  df = reader(bio, **kwargs)


[########                                ] | 21% Completed | 60.98 s

  df = reader(bio, **kwargs)


[########                                ] | 21% Completed | 61.59 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[##########                              ] | 25% Completed | 71.56 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[##########                              ] | 25% Completed | 72.93 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[##########                              ] | 25% Completed | 73.25 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[##########                              ] | 25% Completed | 74.20 s

  df = reader(bio, **kwargs)


[###########                             ] | 29% Completed | 82.48 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[###########                             ] | 29% Completed | 82.85 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[###########                             ] | 29% Completed | 83.80 s

  df = reader(bio, **kwargs)


[#############                           ] | 33% Completed | 93.33 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#############                           ] | 33% Completed | 93.60 s

  df = reader(bio, **kwargs)


[#############                           ] | 33% Completed | 95.78 s

  df = reader(bio, **kwargs)


[###############                         ] | 38% Completed | 103.65 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[################                        ] | 42% Completed | 113.16 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[################                        ] | 42% Completed | 114.36 s

  df = reader(bio, **kwargs)


[##################                      ] | 46% Completed | 123.30 s

  df = reader(bio, **kwargs)


[##################                      ] | 46% Completed | 124.09 s

  df = reader(bio, **kwargs)


[##################                      ] | 46% Completed | 125.01 s

  df = reader(bio, **kwargs)


[####################                    ] | 50% Completed | 132.89 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[####################                    ] | 50% Completed | 134.02 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#####################                   ] | 54% Completed | 145.40 s

  df = reader(bio, **kwargs)


[#####################                   ] | 54% Completed | 146.93 s

  df = reader(bio, **kwargs)


[#######################                 ] | 59% Completed | 155.40 s

  df = reader(bio, **kwargs)


[#######################                 ] | 59% Completed | 155.82 s

  df = reader(bio, **kwargs)


[#######################                 ] | 59% Completed | 156.43 s

  df = reader(bio, **kwargs)


[#######################                 ] | 59% Completed | 156.70 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#######################                 ] | 59% Completed | 157.31 s

  df = reader(bio, **kwargs)


[#########################               ] | 63% Completed | 165.89 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#########################               ] | 63% Completed | 166.64 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#########################               ] | 63% Completed | 167.18 s

  df = reader(bio, **kwargs)


[#########################               ] | 63% Completed | 168.07 s

  df = reader(bio, **kwargs)


[#########################               ] | 63% Completed | 168.58 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[###########################             ] | 67% Completed | 176.70 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[###########################             ] | 67% Completed | 177.13 s

  df = reader(bio, **kwargs)


[###########################             ] | 67% Completed | 177.58 s

  df = reader(bio, **kwargs)


[###########################             ] | 67% Completed | 178.38 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[###########################             ] | 67% Completed | 178.74 s

  df = reader(bio, **kwargs)


[############################            ] | 71% Completed | 188.45 s

  df = reader(bio, **kwargs)


[############################            ] | 71% Completed | 188.82 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[############################            ] | 71% Completed | 189.69 s

  df = reader(bio, **kwargs)


[##############################          ] | 76% Completed | 199.13 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[##############################          ] | 76% Completed | 200.08 s

  df = reader(bio, **kwargs)


[##############################          ] | 76% Completed | 200.59 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[##############################          ] | 76% Completed | 200.88 s

  df = reader(bio, **kwargs)


[################################        ] | 80% Completed | 209.16 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[################################        ] | 80% Completed | 211.54 s

  df = reader(bio, **kwargs)


[#################################       ] | 84% Completed | 218.82 s

  df = reader(bio, **kwargs)


[#################################       ] | 84% Completed | 219.06 s

  df = reader(bio, **kwargs)


[#################################       ] | 84% Completed | 219.68 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#################################       ] | 84% Completed | 219.99 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#################################       ] | 84% Completed | 221.25 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[###################################     ] | 88% Completed | 229.07 s

  df = reader(bio, **kwargs)


[###################################     ] | 88% Completed | 229.72 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[###################################     ] | 88% Completed | 230.02 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[###################################     ] | 88% Completed | 230.34 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#####################################   ] | 92% Completed | 239.70 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#####################################   ] | 92% Completed | 239.84 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#####################################   ] | 93% Completed | 240.88 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#####################################   ] | 93% Completed | 241.37 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[######################################  ] | 97% Completed | 249.67 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[######################################  ] | 97% Completed | 249.93 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[########################################] | 100% Completed | 255.37 s
               code     created_t  last_modified_t  last_updated_t  \
count  3.236626e+06  3.236626e+06     3.236626e+06    3.213304e+06   
mean   4.622355e+49  1.603466e+09     1.655118e+09    1.708340e+09   
std    5.929508e+52  7.025122e+07     4.790258e+07    2.024911e+06   
min    1.000000e+00  1.328021e+09     1.353582e+09    1.705496e+09   
25%    2.251663e+12  1.587637e+09     1.649756e+09    1.707734e+09   
50%    5.051008e+12  1.635203e+09     1.680199e+09    1.707843e+09   
75%    8.992950e+12  1.698830e+09     1.714124e+09    1.717560e+09   
max    8.450851e+55  1.718258e+09     1.718258e+09    1.718258e+09   

       allergens_en  additives     nova_group  ecoscore_score  \
count           0.0        0.0  882066.000000   816530.000000   
mean            NaN        NaN       3.329151       49.050266   
std             NaN        NaN       1.047489       25.108720   
min             NaN        NaN       1

  df = reader(bio, **kwargs)


[########################################] | 100% Completed | 636.33 ms
          code                                                url  \
0        225.0  http://world-en.openfoodfacts.org/product/0000...   
1  207025004.0  http://world-en.openfoodfacts.org/product/0000...   
2    3429145.0  http://world-en.openfoodfacts.org/product/0000...   
3   26772226.0  http://world-en.openfoodfacts.org/product/0000...   
4         17.0  http://world-en.openfoodfacts.org/product/0000...   

          creator     created_t      created_datetime  last_modified_t  \
0  nutrinet-sante  1.623855e+09  2021-06-16T14:53:28Z     1.692102e+09   
1         kiliweb  1.656949e+09  2022-07-04T15:30:10Z     1.656949e+09   
2         kiliweb  1.630484e+09  2021-09-01T08:11:51Z     1.682646e+09   
3         kiliweb  1.654250e+09  2022-06-03T09:58:31Z     1.654270e+09   
4         kiliweb  1.529059e+09  2018-06-15T10:38:00Z     1.561464e+09   

  last_modified_datetime last_modified_by  last_updated_t  \
0   202

In [4]:
# Columnas a eliminar
columns_to_drop = [
    'url', 'creator', 'created_t', 'last_modified_t', 'last_modified_datetime', 'last_modified_by', 
    'last_updated_t', 'last_updated_datetime', 'abbreviated_product_name', 'packaging', 'packaging_tags', 
    'packaging_text', 'brands', 'categories', 'categories_tags', 'origins', 'origins_tags', 
    'manufacturing_places', 'labels', 'labels_tags', 'emb_codes', 'cities', 'countries', 'countries_tags', 
    'ingredients_text', 'allergens', 'traces', 'traces_tags', 'additives_n', 'additives', 'additives_tags', 
    'food_groups', 'food_groups_tags', 'states', 'states_tags', 'last_image_t', 'last_image_datetime', 
    'main_category', 'image_url', 'image_small_url', 'image_ingredients_url', 'image_ingredients_small_url', 
    'image_nutrition_url', 'image_nutrition_small_url'
]

# Medir el tiempo de ejecuci√≥n y usar ProgressBar para monitorear el progreso
start_time = time.time()

try:
    print("Leyendo el archivo CSV completo...")
    with ProgressBar():
        df = dd.read_csv(temp_csv_path, blocksize=25e6, assume_missing=True, sep=separator, encoding=encoding, on_bad_lines='skip', dtype=dtype)
        
        # Eliminar las columnas especificadas
        df_cleaned = df.drop(columns=columns_to_drop, errors='ignore')
        
        # Verificar el n√∫mero de columnas resultantes
        print(f"N√∫mero de columnas antes de eliminar: {len(df.columns)}")
        print(f"N√∫mero de columnas despu√©s de eliminar: {len(df_cleaned.columns)}")

        # Mostrar las columnas resultantes
        print(f"Columnas resultantes: {df_cleaned.columns.tolist()}")

        # Almacenar el DataFrame limpio en una variable
        cleaned_data = df_cleaned

except Exception as e:
    print(f"Error al procesar el archivo CSV completo: {e}")

end_time = time.time()
print(f"Tiempo total de ejecuci√≥n: {end_time - start_time} segundos")

Leyendo el archivo CSV completo...
N√∫mero de columnas antes de eliminar: 206
N√∫mero de columnas despu√©s de eliminar: 162
Columnas resultantes: ['code', 'created_datetime', 'product_name', 'generic_name', 'quantity', 'packaging_en', 'brands_tags', 'categories_en', 'origins_en', 'manufacturing_places_tags', 'labels_en', 'emb_codes_tags', 'first_packaging_code_geo', 'cities_tags', 'purchase_places', 'stores', 'countries_en', 'ingredients_tags', 'ingredients_analysis_tags', 'allergens_en', 'traces_en', 'serving_size', 'serving_quantity', 'no_nutrition_data', 'additives_en', 'nutriscore_score', 'nutriscore_grade', 'nova_group', 'pnns_groups_1', 'pnns_groups_2', 'food_groups_en', 'states_en', 'brand_owner', 'ecoscore_score', 'ecoscore_grade', 'nutrient_levels_tags', 'product_quantity', 'owner', 'data_quality_errors_tags', 'unique_scans_n', 'popularity_tags', 'completeness', 'main_category_en', 'energy-kj_100g', 'energy-kcal_100g', 'energy_100g', 'energy-from-fat_100g', 'fat_100g', 'satura

In [5]:
# Especificar los tipos de datos manualmente
dtype = {
    'additives_en': 'object',
    'additives_tags': 'object',
    'allergens': 'object',
    'brand_owner': 'object',
    'cities_tags': 'object',
    'emb_codes': 'object',
    'emb_codes_tags': 'object',
    'first_packaging_code_geo': 'object',
    'generic_name': 'object',
    'manufacturing_places': 'object',
    'manufacturing_places_tags': 'object',
    'no_nutrition_data': 'object',
    'packaging': 'object',
    'packaging_en': 'object',
    'packaging_tags': 'object',
    'packaging_text': 'object',
    'purchase_places': 'object',
    'serving_size': 'object',
    'stores': 'object',
    'traces': 'object',
    'traces_en': 'object',
    'traces_tags': 'object',
    'abbreviated_product_name': 'object',
    'owner': 'object',
    'additives_n': 'object',
    'cities': 'object',
    'nutriscore_score': 'object',
    'serving_quantity': 'object'
}

# Iniciar el temporizador
start_time = time.time()

try:
    print("Leyendo el archivo CSV completo...")
    with ProgressBar():
        # Leer el archivo CSV usando Dask con tipos de datos especificados
        df = dd.read_csv(temp_csv_path, blocksize=25e6, assume_missing=True, sep=separator, encoding=encoding, on_bad_lines='skip', dtype=dtype)
        
        # Eliminar las columnas especificadas
        df_cleaned = df.drop(columns=columns_to_drop, errors='ignore')
        
        # Verificar el n√∫mero de columnas resultantes
        print(f"N√∫mero de columnas antes de eliminar: {len(df.columns)}")
        print(f"N√∫mero de columnas despu√©s de eliminar: {len(df_cleaned.columns)}")

        # Mostrar las columnas resultantes
        print(f"Columnas resultantes: {df_cleaned.columns.tolist()}")

        # Convertir el DataFrame de Dask a Pandas
        df_cleaned_pd = df_cleaned.compute()

        # Guardar el DataFrame resultante en un archivo CSV utilizando Pandas con codificaci√≥n UTF-8
        output_csv_path = os.path.join(os.path.dirname(local_gz_path), 'dataset_columnas_eliminadas_pandas.csv')
        df_cleaned_pd.to_csv(output_csv_path, sep='\t', index=False, encoding='utf-8')
        print(f"Archivo CSV con columnas eliminadas guardado en: {output_csv_path}")

except Exception as e:
    print(f"Error al procesar el archivo CSV completo: {e}")

# Calcular y mostrar el tiempo total de ejecuci√≥n
end_time = time.time()
print(f"Tiempo total de ejecuci√≥n: {end_time - start_time} segundos")

# Verificar el n√∫mero de filas y columnas del nuevo dataset
try:
    df_result = pd.read_csv(output_csv_path, sep='\t', low_memory=False, encoding='utf-8')
    num_filas = df_result.shape[0]
    num_columnas = df_result.shape[1]

    print(f"N√∫mero de filas del nuevo dataset: {num_filas}")
    print(f"N√∫mero de columnas del nuevo dataset: {num_columnas}")
except Exception as e:
    print(f"Error al leer el archivo CSV generado: {e}")

Leyendo el archivo CSV completo...
N√∫mero de columnas antes de eliminar: 206
N√∫mero de columnas despu√©s de eliminar: 162
Columnas resultantes: ['code', 'created_datetime', 'product_name', 'generic_name', 'quantity', 'packaging_en', 'brands_tags', 'categories_en', 'origins_en', 'manufacturing_places_tags', 'labels_en', 'emb_codes_tags', 'first_packaging_code_geo', 'cities_tags', 'purchase_places', 'stores', 'countries_en', 'ingredients_tags', 'ingredients_analysis_tags', 'allergens_en', 'traces_en', 'serving_size', 'serving_quantity', 'no_nutrition_data', 'additives_en', 'nutriscore_score', 'nutriscore_grade', 'nova_group', 'pnns_groups_1', 'pnns_groups_2', 'food_groups_en', 'states_en', 'brand_owner', 'ecoscore_score', 'ecoscore_grade', 'nutrient_levels_tags', 'product_quantity', 'owner', 'data_quality_errors_tags', 'unique_scans_n', 'popularity_tags', 'completeness', 'main_category_en', 'energy-kj_100g', 'energy-kcal_100g', 'energy_100g', 'energy-from-fat_100g', 'fat_100g', 'satura

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[                                        ] | 0% Completed | 4.06 s

  df = reader(bio, **kwargs)


[                                        ] | 0% Completed | 4.55 s

  df = reader(bio, **kwargs)


[                                        ] | 1% Completed | 5.46 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[                                        ] | 2% Completed | 6.32 s

  df = reader(bio, **kwargs)


[#                                       ] | 4% Completed | 10.40 s

  df = reader(bio, **kwargs)


[#                                       ] | 4% Completed | 10.89 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[##                                      ] | 5% Completed | 12.28 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[##                                      ] | 5% Completed | 13.10 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[###                                     ] | 7% Completed | 15.18 s

  df = reader(bio, **kwargs)


[###                                     ] | 8% Completed | 18.72 s

  df = reader(bio, **kwargs)


[###                                     ] | 9% Completed | 19.84 s

  df = reader(bio, **kwargs)


[###                                     ] | 9% Completed | 20.32 s

  df = reader(bio, **kwargs)


[####                                    ] | 10% Completed | 20.89 s

  df = reader(bio, **kwargs)


[#####                                   ] | 13% Completed | 26.81 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#####                                   ] | 13% Completed | 27.29 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#####                                   ] | 13% Completed | 27.62 s

  df = reader(bio, **kwargs)


[#####                                   ] | 13% Completed | 28.17 s

  df = reader(bio, **kwargs)


[#####                                   ] | 14% Completed | 28.47 s

  df = reader(bio, **kwargs)


[######                                  ] | 17% Completed | 34.26 s

  df = reader(bio, **kwargs)


[#######                                 ] | 17% Completed | 35.16 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#######                                 ] | 17% Completed | 35.99 s

  df = reader(bio, **kwargs)


[########                                ] | 21% Completed | 41.04 s

  df = reader(bio, **kwargs)


[########                                ] | 21% Completed | 42.32 s

  df = reader(bio, **kwargs)


[########                                ] | 21% Completed | 42.69 s

  df = reader(bio, **kwargs)


[########                                ] | 21% Completed | 43.23 s

  df = reader(bio, **kwargs)


[########                                ] | 21% Completed | 44.14 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#########                               ] | 23% Completed | 45.62 s

  df = reader(bio, **kwargs)


[##########                              ] | 25% Completed | 49.38 s

  df = reader(bio, **kwargs)


[##########                              ] | 26% Completed | 51.69 s

  df = reader(bio, **kwargs)


[##########                              ] | 26% Completed | 51.81 s

  df = reader(bio, **kwargs)


[##########                              ] | 26% Completed | 52.24 s

  df = reader(bio, **kwargs)


[##########                              ] | 26% Completed | 53.71 s

  df = reader(bio, **kwargs)


[###########                             ] | 28% Completed | 55.46 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[###########                             ] | 29% Completed | 59.47 s

  df = reader(bio, **kwargs)


[############                            ] | 30% Completed | 62.75 s

  df = reader(bio, **kwargs)


[############                            ] | 31% Completed | 63.28 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#############                           ] | 32% Completed | 65.40 s

  df = reader(bio, **kwargs)


[#############                           ] | 33% Completed | 65.95 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#############                           ] | 33% Completed | 66.73 s

  df = reader(bio, **kwargs)


[#############                           ] | 34% Completed | 69.08 s

  df = reader(bio, **kwargs)


[#############                           ] | 34% Completed | 70.45 s

  df = reader(bio, **kwargs)


[#############                           ] | 34% Completed | 70.80 s

  df = reader(bio, **kwargs)


[##############                          ] | 35% Completed | 71.18 s

  df = reader(bio, **kwargs)


[##############                          ] | 35% Completed | 72.12 s

  df = reader(bio, **kwargs)


[##############                          ] | 37% Completed | 76.08 s

  df = reader(bio, **kwargs)


[###############                         ] | 38% Completed | 78.71 s

  df = reader(bio, **kwargs)


[###############                         ] | 38% Completed | 78.98 s

  df = reader(bio, **kwargs)


[###############                         ] | 38% Completed | 80.13 s

  df = reader(bio, **kwargs)


[###############                         ] | 39% Completed | 81.46 s

  df = reader(bio, **kwargs)


[################                        ] | 41% Completed | 84.02 s

  df = reader(bio, **kwargs)


[################                        ] | 41% Completed | 85.36 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[################                        ] | 41% Completed | 85.97 s

  df = reader(bio, **kwargs)


[################                        ] | 42% Completed | 86.71 s

  df = reader(bio, **kwargs)


[#################                       ] | 42% Completed | 88.51 s

  df = reader(bio, **kwargs)


[#################                       ] | 43% Completed | 89.69 s

  df = reader(bio, **kwargs)


[#################                       ] | 43% Completed | 91.73 s

  df = reader(bio, **kwargs)


[##################                      ] | 45% Completed | 93.56 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[##################                      ] | 45% Completed | 94.97 s

  df = reader(bio, **kwargs)


[##################                      ] | 45% Completed | 95.90 s

  df = reader(bio, **kwargs)


[##################                      ] | 46% Completed | 98.02 s

  df = reader(bio, **kwargs)


[##################                      ] | 46% Completed | 99.09 s

  df = reader(bio, **kwargs)


[###################                     ] | 47% Completed | 100.29 s

  df = reader(bio, **kwargs)


[###################                     ] | 47% Completed | 100.75 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[###################                     ] | 48% Completed | 102.38 s

  df = reader(bio, **kwargs)


[###################                     ] | 49% Completed | 105.01 s

  df = reader(bio, **kwargs)


[###################                     ] | 49% Completed | 105.97 s

  df = reader(bio, **kwargs)


[####################                    ] | 50% Completed | 107.63 s

  df = reader(bio, **kwargs)


[####################                    ] | 51% Completed | 109.06 s

  df = reader(bio, **kwargs)


[####################                    ] | 51% Completed | 109.67 s

  df = reader(bio, **kwargs)


[#####################                   ] | 53% Completed | 113.63 s

  df = reader(bio, **kwargs)


[#####################                   ] | 53% Completed | 113.90 s

  df = reader(bio, **kwargs)


[#####################                   ] | 54% Completed | 115.95 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#####################                   ] | 54% Completed | 116.92 s

  df = reader(bio, **kwargs)


[######################                  ] | 55% Completed | 117.98 s

  df = reader(bio, **kwargs)


[######################                  ] | 56% Completed | 119.92 s

  df = reader(bio, **kwargs)


[######################                  ] | 57% Completed | 123.21 s

  df = reader(bio, **kwargs)


[#######################                 ] | 58% Completed | 125.17 s

  df = reader(bio, **kwargs)


[#######################                 ] | 58% Completed | 125.34 s

  df = reader(bio, **kwargs)


[#######################                 ] | 58% Completed | 126.49 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[########################                ] | 61% Completed | 130.67 s

  df = reader(bio, **kwargs)


[########################                ] | 61% Completed | 131.43 s

  df = reader(bio, **kwargs)


[########################                ] | 61% Completed | 133.27 s

  df = reader(bio, **kwargs)


[########################                ] | 61% Completed | 134.03 s

  df = reader(bio, **kwargs)


[#########################               ] | 62% Completed | 135.08 s

  df = reader(bio, **kwargs)


[#########################               ] | 63% Completed | 137.05 s

  df = reader(bio, **kwargs)


[#########################               ] | 64% Completed | 138.89 s

  df = reader(bio, **kwargs)


[##########################              ] | 65% Completed | 142.68 s

  df = reader(bio, **kwargs)


[##########################              ] | 65% Completed | 143.73 s

  df = reader(bio, **kwargs)


[##########################              ] | 65% Completed | 144.16 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[##########################              ] | 66% Completed | 145.97 s

  df = reader(bio, **kwargs)


[###########################             ] | 68% Completed | 149.48 s

  df = reader(bio, **kwargs)


[###########################             ] | 68% Completed | 150.54 s

  df = reader(bio, **kwargs)


[###########################             ] | 69% Completed | 153.03 s

  df = reader(bio, **kwargs)


[############################            ] | 70% Completed | 154.50 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[############################            ] | 70% Completed | 154.95 s

  df = reader(bio, **kwargs)


[############################            ] | 70% Completed | 155.86 s

  df = reader(bio, **kwargs)


[############################            ] | 70% Completed | 156.47 s

  df = reader(bio, **kwargs)


[############################            ] | 71% Completed | 158.38 s

  df = reader(bio, **kwargs)


[############################            ] | 72% Completed | 160.39 s

  df = reader(bio, **kwargs)


[#############################           ] | 73% Completed | 164.34 s

  df = reader(bio, **kwargs)


[#############################           ] | 74% Completed | 165.58 s

  df = reader(bio, **kwargs)


[#############################           ] | 74% Completed | 166.00 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[#############################           ] | 74% Completed | 166.55 s

  df = reader(bio, **kwargs)


[#############################           ] | 74% Completed | 167.41 s

  df = reader(bio, **kwargs)


[###############################         ] | 78% Completed | 176.82 s

  df = reader(bio, **kwargs)


[###############################         ] | 78% Completed | 177.48 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[###############################         ] | 78% Completed | 178.51 s

  df = reader(bio, **kwargs)


[###############################         ] | 79% Completed | 178.78 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[################################        ] | 80% Completed | 181.92 s

  df = reader(bio, **kwargs)


[################################        ] | 80% Completed | 183.31 s

  df = reader(bio, **kwargs)


[################################        ] | 81% Completed | 184.95 s

  df = reader(bio, **kwargs)


[################################        ] | 82% Completed | 188.46 s

  df = reader(bio, **kwargs)


[################################        ] | 82% Completed | 188.95 s

  df = reader(bio, **kwargs)


[#################################       ] | 82% Completed | 189.88 s

  df = reader(bio, **kwargs)


[#################################       ] | 84% Completed | 193.68 s

  df = reader(bio, **kwargs)


[##################################      ] | 85% Completed | 196.09 s

  df = reader(bio, **kwargs)


[##################################      ] | 86% Completed | 200.63 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[##################################      ] | 86% Completed | 201.10 s

  df = reader(bio, **kwargs)


[###################################     ] | 88% Completed | 206.97 s

  df = reader(bio, **kwargs)


[###################################     ] | 89% Completed | 209.21 s

  df = reader(bio, **kwargs)


[####################################    ] | 90% Completed | 213.98 s

  df = reader(bio, **kwargs)


[####################################    ] | 90% Completed | 214.61 s

  df = reader(bio, **kwargs)


[####################################    ] | 90% Completed | 215.08 s

  df = reader(bio, **kwargs)


[#####################################   ] | 92% Completed | 218.42 s

  df = reader(bio, **kwargs)


[#####################################   ] | 94% Completed | 227.01 s

  df = reader(bio, **kwargs)


[#####################################   ] | 94% Completed | 227.16 s

  df = reader(bio, **kwargs)


[#####################################   ] | 94% Completed | 227.59 s

  df = reader(bio, **kwargs)


[#####################################   ] | 94% Completed | 227.95 s

  df = reader(bio, **kwargs)


[######################################  ] | 95% Completed | 229.57 s

  df = reader(bio, **kwargs)


[######################################  ] | 97% Completed | 231.90 s

  df = reader(bio, **kwargs)
  df = reader(bio, **kwargs)


[######################################  ] | 97% Completed | 232.35 s

  df = reader(bio, **kwargs)


[######################################  ] | 97% Completed | 233.24 s

  df = reader(bio, **kwargs)


[####################################### ] | 98% Completed | 234.76 s

  df = reader(bio, **kwargs)


[########################################] | 100% Completed | 236.71 s
Archivo CSV con columnas eliminadas guardado en: C:/TFM_OPF\dataset_columnas_eliminadas_pandas.csv
Tiempo total de ejecuci√≥n: 712.0728938579559 segundos
N√∫mero de filas del nuevo dataset: 3236626
N√∫mero de columnas del nuevo dataset: 162
