# Universidad Internacional de La Rioja  

### Máster Universitario en Visual Analytics and Big Data  

---

### **Predicción y Análisis de la Demanda y Suministro de Productos entre la Comunidad Andina y España**  

### **Trabajo Fin de Estudio**  
- **Presentado por:** Danilo Andrés Beleño Villafañe  

---

## **Codigo 1: Paso de la Zona de Tránsito a la Zona de Datos Crudos**  


In [1]:
import time
import zipfile
import logging
from io import BytesIO
from google.cloud import storage
from concurrent.futures import ThreadPoolExecutor

In [2]:
logging.basicConfig(level=logging.INFO)

In [3]:
def process_blob(blob, destination_bucket, target_zip_folder):
    try:
        logging.info(f'Processing {blob.name}')
        zip_data = blob.download_as_bytes()
        zip_file = BytesIO(zip_data)

        with zipfile.ZipFile(zip_file, 'r') as z:
            for filename in z.namelist():
                logging.info(f'Extracting {filename}')

                file_data = z.read(filename)
                destination_blob_name = f'{target_zip_folder}{filename}'
                destination_blob = destination_bucket.blob(destination_blob_name)

                destination_blob.upload_from_string(file_data)
                logging.info(f'{filename} extracted to {destination_blob_name}')
        zip_file.close()
    except Exception as e:
        logging.error(f"Error processing {blob.name}: {e}")

In [4]:
def extract_zip_to_bucket(source_bucket_name, destination_bucket_name, zip_folder, target_zip_folder):
    client = storage.Client()
    source_bucket = client.bucket(source_bucket_name)
    destination_bucket = client.bucket(destination_bucket_name)

    blobs = source_bucket.list_blobs(prefix=zip_folder)

    with ThreadPoolExecutor(max_workers=3) as executor:
        for blob in blobs:
            if blob.name.endswith('.zip'):
                executor.submit(process_blob, blob, destination_bucket, target_zip_folder)

In [5]:
transient_zone = 'data-factory-0-transient-zone'
raw_data_zone = 'data-factory-1-raw-data-zone'
source_data_folder = 'data/datacomex/taric/'
target_data_folder = 'data/datacomex/taric/'

In [None]:
start_time = time.time()

extract_zip_to_bucket(transient_zone, raw_data_zone, source_data_folder, target_data_folder)

end_time = time.time()

In [7]:
elapsed_time = end_time - start_time

hours, remainder = divmod(elapsed_time, 3600)
minutes, seconds = divmod(remainder, 60)

print(f"The execution time was: {int(hours)} hours, {int(minutes)} minutes, and {int(seconds)} seconds.")

The execution time was: 0 hours, 8 minutes, and 9 seconds.
