In [1]:
#sih
from tqdm import tqdm
import os
from pysus.online_data import SIH
from concurrent.futures import ThreadPoolExecutor, as_completed
import time

In [6]:

# Function to download SIA data for a specific year, month, and UF
def download_sih_data(year, month, uf):
    try:
        print(f"Downloading data for UF: {uf}, Year: {year}, Month: {month}")
        SIH.download([uf], [year], [month], groups= data_group, data_dir= dbfs_raw_path)
    except Exception as e:
        print(f"Failed to download data for UF: {uf}, Year: {year}, Month: {month}: {str(e)}")
        
# Parallel download function with progress tracking
def download_data_parallel(ufs, years, months):

        # Record the start time of the job
    start_time = time.time()


    # Ensure the destination folder exists
    if not os.path.exists(dbfs_raw_path):
        os.makedirs(dbfs_raw_path)
    
    # Calculate total tasks
    total_tasks = len(ufs) * len(years) * len(months)

    # Initialize error counter
    error_count = 0

    # Create a ThreadPoolExecutor
    with ThreadPoolExecutor(max_workers=100) as executor, tqdm(total=total_tasks) as progress_bar:
        # Using a list to store download tasks
        futures = [
            executor.submit(download_sih_data, year, month, uf)
            for uf in ufs for year in years for month in months
        ]

        # Process the tasks as they are completed
        for future in as_completed(futures):
            result = future.result()

            # Check if the task was successful
            if result is None:  # Assume None represents failure
                error_count += 1

            # Update the progress bar for each completed task
            progress_bar.update(1)

    # Print summary of download errors
    print(f"All downloads completed, errors: {error_count}")

    # Record the end time of the job
    end_time = time.time()
    # Calculate the total execution time
    total_time = end_time - start_time
    print(f"Total execution time: {total_time:.2f} seconds")

# Example usage
ufs = ['ac', 'al', 'ap', 'am','ba', 'ce', 'df', 'es', 'go', 'ma', 'mt', 'ms', 'mg', 'pa', 'pb', 'pr', 'pe', 'pi', 'rj', 'rn', 'rs', 'ro', 'rr', 'sc', 'sp', 'se', 'to']
years = [2019]
months = [1] #list(range(1, 13))
data_group = ['RD']
# Path to store the raw data in DBFS
dbfs_raw_path = "./tmp4"

# Call the parallel download function
download_data_parallel(ufs, years, months)

  0%|          | 0/27 [00:00<?, ?it/s]

Downloading data for UF: ac, Year: 2019, Month: 1
Downloading data for UF: al, Year: 2019, Month: 1
Downloading data for UF: ap, Year: 2019, Month: 1
Downloading data for UF: am, Year: 2019, Month: 1
Downloading data for UF: ba, Year: 2019, Month: 1
Downloading data for UF: ce, Year: 2019, Month: 1
Downloading data for UF: df, Year: 2019, Month: 1
Downloading data for UF: es, Year: 2019, Month: 1
Downloading data for UF: go, Year: 2019, Month: 1
Downloading data for UF: ma, Year: 2019, Month: 1
Downloading data for UF: mt, Year: 2019, Month: 1
Downloading data for UF: ms, Year: 2019, Month: 1
Downloading data for UF: mg, Year: 2019, Month: 1
Downloading data for UF: pa, Year: 2019, Month: 1





Downloading data for UF: pb, Year: 2019, Month: 1
Downloading data for UF: pr, Year: 2019, Month: 1
Downloading data for UF: pe, Year: 2019, Month: 1
Downloading data for UF: pi, Year: 2019, Month: 1
Downloading data for UF: rj, Year: 2019, Month: 1


  0%|          | 0/1 [00:00<?, ?it/s]

Downloading data for UF: rn, Year: 2019, Month: 1
Downloading data for UF: rs, Year: 2019, Month: 1
Downloading data for UF: ro, Year: 2019, Month: 1
Downloading data for UF: rr, Year: 2019, Month: 1


[A

  0%|          | 0/1 [00:00<?, ?it/s]

Downloading data for UF: sc, Year: 2019, Month: 1
Downloading data for UF: sp, Year: 2019, Month: 1
Downloading data for UF: se, Year: 2019, Month: 1
Downloading data for UF: to, Year: 2019, Month: 1


[A[A
[A


[A[A[A

[A[A





[A[A[A[A[A[A





[A[A[A[A[A[A





[A[A[A[A[A[A






[A[A[A[A[A[A[A




[A[A[A[A[A







[A[A[A[A[A[A[A[A








[A[A[A[A[A[A[A[A[A










[A[A[A[A[A[A[A[A[A[A[A










[A[A[A[A[A[A[A[A[A[A[A











[A[A[A[A[A[A[A[A[A[A[A[A











[A[A[A[A[A[A[A[A[A[A[A[A













[A[A[A[A[A[A[A[A[A[A[A[A[A[A












[A[A[A[A[A[A[A[A[A[A[A[A[A



[A[A[A[A
[A














[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A

[A[A

[A[A






[A[A[A[A[A[A[A




[A[A[A[A[A







[A[A[A[A[A[A[A[A
















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A








[A[A[A[A[A[A[A[A[A









[A[A[A[A[A[A[A[A[A[A










[A[A[A[A[A[A[A[A[A[A



RDTO1901.parquet: 100%|██████████| 20.0k/20.0k [00:08<00:00, 2.46kB/s]





[A[A[A[A[A









[A[A[A[A[A[A[A[A[A[A














[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A














[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A














[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A
















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A













[A[A[A[A[A[A[A[A[A[A[A[A[A[A













[A[A[A[A[A[A[A[A[A[A[A[A[A[A











[A[A[A[A[A[A[A[A[A[A[A[A


[A[A[A
[A
[A






[A[A[A[A[A[A[A






[A[A[A[A[A[A[A






 15%|█▍        | 4/27 [00:17<01:25,  3.70s/it]




[A[A[A[A[A









[A[A[A[A[A[A[A[A[A[A












[A[A[A[A[A[A[A[A[A[A[A[A[A















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A














[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A
















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A





[A[A[A[A[A[A







[A[A[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A



[A[A[A[A









[A[A[A[A[A[A[A[A[A[A















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A












[A[A[A[A[A[A[A[A[A[A[A[A[A
















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A
















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A














[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A














[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A













[A[A[A[A[A[A[A[A[A[A[A[A[A[A


[A[A[A











[A[A[A[A[A[A[A[A[A[A[A[A








[A[A[A[A[A[A[A[A[A








[A[A[A[A[A[A[A[A[A










[A[A[A[A[A[A[A[A[A[A[A










[A[A[A[A[A[A[A[A[A[A[A



[A[A[A[A









[A[A[A[A[A[A[A[A[A[A















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A












[A[A[A[A[A[A[A[A[A[A[A[A[A












[A[A[A[A[A[A[



RDSP1901.dbc:  56%|█████▋    | 9.22M/16.3M [00:38<00:06, 1.07MB/s][A[A[A[A[A[A



[A[A[A[A














[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A












[A[A[A[A[A[A[A[A[A[A[A[A[A
















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A


[A[A[A


[A[A[A







[A[A[A[A[A[A[A[A



[A[A[A[A



[A[A[A[A












[A[A[A[A[A[A[A[A[A[A[A[A[A




[A[A[A[A[A
















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A










[A[A[A[A[A[A[A[A[A[A[A










[A[A[A[A[A[A[A[A[A[A[A


[A[A[A


[A[A[A















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A



RDMT1901.parquet: 100%|██████████| 52.5k/52.5k [00:23<00:00, 2.26kB/s]

















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A





[A[A[A[A[A[A




[A[A[A[A[A







[A[A[A[A[A[A[A[A


[A[A[A















[A[