In [None]:
!python --version

Python 3.10.12


In [None]:
%%capture
!pip install pyspark raster2xyz

# ⬇️ Downloads
---

In [None]:
# Direct download minimum temperature URLs
tmin = ['https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_10m_tmin.zip',
        'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_5m_tmin.zip',
        'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_2.5m_tmin.zip',
        'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_30s_tmin.zip']

In [None]:
# Direct download maximum temperature URLs
tmax = ['https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_10m_tmax.zip',
        'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_5m_tmax.zip',
        'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_2.5m_tmax.zip',
        'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_30s_tmax.zip']

In [None]:
# Direct download average temperature URLs
tavg = ['https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_10m_tavg.zip',
        'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_5m_tavg.zip',
        'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_2.5m_tavg.zip',
        'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_30s_tavg.zip']

In [None]:
# Direct download precipitation URLs
prec = ['https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_10m_prec.zip',
        'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_5m_prec.zip',
        'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_2.5m_prec.zip',
        'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_30s_prec.zip']

In [None]:
# Combine all URLs into one list
urls = tmin + tmax + tavg + prec
urls

['https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_10m_tmin.zip',
 'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_5m_tmin.zip',
 'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_2.5m_tmin.zip',
 'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_30s_tmin.zip',
 'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_10m_tmax.zip',
 'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_5m_tmax.zip',
 'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_2.5m_tmax.zip',
 'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_30s_tmax.zip',
 'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_10m_tavg.zip',
 'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_5m_tavg.zip',
 'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_2.5m_tavg.zip',
 'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_30s_tavg.zip',
 'https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_10m_prec.zip',
 'https://biogeo.ucdavis.

In [None]:
# Loop through each URL and download with wget
for url in urls:
  !wget -q --show-progress "$url"



# ✳️ Imports
---

In [None]:
from pyspark.sql import SparkSession

In [None]:
import pandas as pd

In [None]:
import numpy as np

# 🅰️ Variáveis
---

In [None]:
NÚMERO_AMOSTRAS = 32

In [None]:
# A omissão do parâmetro '.master' permite que o Spark utilize todos os núcleos de CPU disponíveis por padrão
spark = SparkSession.builder.getOrCreate()

# 🧮 Funções
---

In [None]:
def carregar_csv_pandas_dataframe(caminho_arquivo:str):
  pd.read_csv(caminho_arquivo)

In [None]:
def carregar_csv_spark_dataframe(caminho_arquivo:str):
  spark.read.csv(caminho_arquivo)

In [None]:
def calcular_tempo_execução(função, caminho_arquivo, número_amostras=NÚMERO_AMOSTRAS):
  tempos = []

  for _ in range(número_amostras):
    # -q: Suppress output
    # -r 1: Run only 1 repeat
    # -n 1: Execute the function only once per run
    # -o: Return a TimeitResult object
    # 1 por vez pois precisamos de 32 amostras separadas e não um objeto TimeitResult com 32 valores
    tempo = %timeit -q -r 1 -n 1 -o função(caminho_arquivo)
    tempos.append(tempo.best)

  return np.array(tempos)

# ↘️ minimum temperature (°C)
---

## minimum temperature (°C) - 10 minutes

In [None]:
!unzip -o wc2.1_10m_tmin.zip
!raster2xyz wc2.1_10m_tmin_01.tif wc2.1_10m_tmin_01.csv

Archive:  wc2.1_10m_tmin.zip
  inflating: wc2.1_10m_tmin_01.tif   
  inflating: wc2.1_10m_tmin_02.tif   
  inflating: wc2.1_10m_tmin_03.tif   
  inflating: wc2.1_10m_tmin_04.tif   
  inflating: wc2.1_10m_tmin_05.tif   
  inflating: wc2.1_10m_tmin_06.tif   
  inflating: wc2.1_10m_tmin_07.tif   
  inflating: wc2.1_10m_tmin_08.tif   
  inflating: wc2.1_10m_tmin_09.tif   
  inflating: wc2.1_10m_tmin_10.tif   
  inflating: wc2.1_10m_tmin_11.tif   
  inflating: wc2.1_10m_tmin_12.tif   
[2024-05-05 03:02:34 - INFO] - Getting geotransform and data...
[2024-05-05 03:02:34 - INFO] - Getting XYZ data...
[2024-05-05 03:02:34 - INFO] - Getting geotransformed coordinates...
[2024-05-05 03:02:34 - INFO] - Building XYZ data...
[2024-05-05 03:02:42 - INFO] - New XYZ (csv file) created...


In [None]:
caminho = 'wc2.1_10m_tmin_01.csv'

In [None]:
tempos_tmin_10m_pandas = calcular_tempo_execução(carregar_csv_pandas_dataframe, caminho)
tempos_tmin_10m_pandas

array([1.09651472, 1.92865718, 1.73240856, 1.07692   , 1.07480664,
       1.09495064, 1.08160305, 1.08540921, 1.08082396, 1.10814988,
       1.07558786, 1.07337964, 1.86450389, 1.89914597, 1.0511635 ,
       1.05311617, 1.0571062 , 1.07804957, 1.06163485, 1.05201735,
       1.04771623, 1.27148751, 1.05669025, 2.0539455 , 1.56308199,
       1.05212413, 1.05443223, 1.1540952 , 1.04987888, 1.061428  ,
       1.26483365, 1.06355035])

In [None]:
tempos_tmin_10m_spark = calcular_tempo_execução(carregar_csv_spark_dataframe, caminho)
tempos_tmin_10m_spark

array([10.61609087,  0.48086456,  0.60021178,  0.37439656,  0.35239764,
        0.50137273,  0.41451636,  0.30278204,  0.3997988 ,  0.58516363,
        0.27120372,  0.32519276,  0.4863475 ,  0.41910192,  0.61690987,
        0.83484451,  0.29530197,  0.56488182,  0.49748738,  0.45656465,
        0.24190956,  0.34474178,  0.18119981,  0.25310707,  0.464886  ,
        0.20872152,  0.21848589,  0.24955264,  0.19495704,  0.25835682,
        0.28925133,  0.18551876])

## minimum temperature (°C) - 5 minutes

In [None]:
!unzip -o wc2.1_5m_tmin.zip
!raster2xyz wc2.1_5m_tmin_01.tif wc2.1_5m_tmin_01.csv

Archive:  wc2.1_5m_tmin.zip
  inflating: wc2.1_5m_tmin_01.tif    
  inflating: wc2.1_5m_tmin_02.tif    
  inflating: wc2.1_5m_tmin_03.tif    
  inflating: wc2.1_5m_tmin_04.tif    
  inflating: wc2.1_5m_tmin_05.tif    
  inflating: wc2.1_5m_tmin_06.tif    
  inflating: wc2.1_5m_tmin_07.tif    
  inflating: wc2.1_5m_tmin_08.tif    
  inflating: wc2.1_5m_tmin_09.tif    
  inflating: wc2.1_5m_tmin_10.tif    
  inflating: wc2.1_5m_tmin_11.tif    
  inflating: wc2.1_5m_tmin_12.tif    
[2024-05-05 03:03:47 - INFO] - Getting geotransform and data...
[2024-05-05 03:03:48 - INFO] - Getting XYZ data...
[2024-05-05 03:03:48 - INFO] - Getting geotransformed coordinates...
[2024-05-05 03:03:48 - INFO] - Building XYZ data...
[2024-05-05 03:04:29 - INFO] - New XYZ (csv file) created...


In [None]:
caminho = 'wc2.1_5m_tmin_01.csv'

In [None]:
tempos_tmin_5m_pandas = calcular_tempo_execução(carregar_csv_pandas_dataframe, caminho)
tempos_tmin_5m_pandas

array([5.09985534, 6.73521953, 4.68469608, 4.56373373, 6.60820232,
       4.43435481, 4.31532468, 6.12600466, 7.10282545, 4.36091988,
       6.39178986, 4.56757425, 4.36835416, 6.78228962, 4.42824718,
       4.46978931, 6.63246734, 4.28738209, 4.40366311, 7.28363112,
       4.25900127, 4.83628286, 6.39460229, 4.21916754, 4.40398114,
       6.44045961, 4.48104072, 4.21410691, 6.13824167, 4.86001926,
       4.32020807, 5.97052166])

In [None]:
tempos_tmin_5m_spark = calcular_tempo_execução(carregar_csv_spark_dataframe, caminho)
tempos_tmin_5m_spark

array([0.39998119, 0.64494531, 0.40640171, 0.70102898, 0.39100834,
       0.16740431, 0.2404334 , 0.38286395, 0.15890038, 0.1878957 ,
       0.3034137 , 0.14929356, 0.23343168, 0.30481368, 0.15284096,
       0.2924216 , 0.40003815, 0.30208382, 0.22374634, 0.28968781,
       0.13374217, 0.19345593, 0.25109034, 0.14795041, 0.16528393,
       0.27441928, 0.17205191, 0.27428718, 0.15716856, 0.1934094 ,
       0.23536078, 0.1923723 ])

## minimum temperature (°C) - 2.5 minutes

In [None]:
!unzip -o wc2.1_2.5m_tmin.zip
!raster2xyz wc2.1_2.5m_tmin_01.tif wc2.1_2.5m_tmin_01.csv

Archive:  wc2.1_2.5m_tmin.zip
  inflating: wc2.1_2.5m_tmin_01.tif  
  inflating: wc2.1_2.5m_tmin_02.tif  
  inflating: wc2.1_2.5m_tmin_03.tif  
  inflating: wc2.1_2.5m_tmin_04.tif  
  inflating: wc2.1_2.5m_tmin_05.tif  
  inflating: wc2.1_2.5m_tmin_06.tif  
  inflating: wc2.1_2.5m_tmin_07.tif  
  inflating: wc2.1_2.5m_tmin_08.tif  
  inflating: wc2.1_2.5m_tmin_09.tif  
  inflating: wc2.1_2.5m_tmin_10.tif  
  inflating: wc2.1_2.5m_tmin_11.tif  
  inflating: wc2.1_2.5m_tmin_12.tif  
[2024-05-05 03:07:36 - INFO] - Getting geotransform and data...
[2024-05-05 03:07:38 - INFO] - Getting XYZ data...
[2024-05-05 03:07:40 - INFO] - Getting geotransformed coordinates...
[2024-05-05 03:07:42 - INFO] - Building XYZ data...
[2024-05-05 03:10:48 - INFO] - New XYZ (csv file) created...


In [None]:
caminho = 'wc2.1_2.5m_tmin_01.csv'

In [None]:
tempos_tmin_2_5m_pandas = calcular_tempo_execução(carregar_csv_pandas_dataframe, caminho)
tempos_tmin_2_5m_pandas

array([21.91878809, 22.46864899, 23.03490272, 21.836362  , 29.90102256,
       24.95047976, 23.16257622, 22.6187023 , 22.72868633, 21.63556661,
       21.12074067, 21.14091241, 21.24494502, 24.80432814, 26.38433257,
       22.03750191, 21.16162602, 21.36923378, 21.03705011, 22.65572653,
       25.1609865 , 25.41051628, 22.59656895, 28.59358495, 31.35772773,
       21.40200054, 21.53118578, 22.64893798, 22.88124442, 22.99024501,
       21.95327131, 21.1883131 ])

In [None]:
tempos_tmin_2_5m_spark = calcular_tempo_execução(carregar_csv_spark_dataframe, caminho)
tempos_tmin_2_5m_spark

array([0.54865686, 0.68240785, 0.2967396 , 0.39597665, 0.59844292,
       0.34284685, 0.61710819, 0.31403838, 0.42243781, 0.54444601,
       0.28141423, 0.38684959, 0.57876986, 0.19700149, 0.23527247,
       0.29485367, 0.17021741, 0.30960696, 0.15253902, 0.2220213 ,
       0.34290053, 0.18126155, 0.20359312, 0.27919184, 0.17577979,
       0.41704932, 0.31162529, 0.27283949, 0.45496178, 0.18604649,
       0.32719642, 0.40014776])

## minimum temperature (°C) - 30 seconds

In [None]:
!unzip -o wc2.1_30s_tmin.zip
!raster2xyz wc2.1_30s_tmin_01.tif wc2.1_30s_tmin_01.csv

Archive:  wc2.1_30s_tmin.zip
  inflating: wc2.1_30s_tmin_01.tif   
  inflating: wc2.1_30s_tmin_02.tif   
  inflating: wc2.1_30s_tmin_03.tif   
  inflating: wc2.1_30s_tmin_04.tif   
  inflating: wc2.1_30s_tmin_05.tif   
  inflating: wc2.1_30s_tmin_06.tif   
  inflating: wc2.1_30s_tmin_07.tif   
  inflating: wc2.1_30s_tmin_08.tif   
  inflating: wc2.1_30s_tmin_09.tif   
  inflating: wc2.1_30s_tmin_10.tif   
  inflating: wc2.1_30s_tmin_11.tif   
  inflating: wc2.1_30s_tmin_12.tif   
[2024-05-05 03:24:44 - INFO] - Getting geotransform and data...
[2024-05-05 03:25:04 - INFO] - Getting XYZ data...
^C


⚠️⚠️⚠️ NÃO RODOU

# ↗️ maximum temperature (°C)
---

## maximum temperature (°C) - 10 minutes

In [None]:
!unzip -o wc2.1_10m_tmax.zip
!raster2xyz wc2.1_10m_tmax_01.tif wc2.1_10m_tmax_01.csv

Archive:  wc2.1_10m_tmax.zip
  inflating: wc2.1_10m_tmax_01.tif   
  inflating: wc2.1_10m_tmax_02.tif   
  inflating: wc2.1_10m_tmax_03.tif   
  inflating: wc2.1_10m_tmax_04.tif   
  inflating: wc2.1_10m_tmax_05.tif   
  inflating: wc2.1_10m_tmax_06.tif   
  inflating: wc2.1_10m_tmax_07.tif   
  inflating: wc2.1_10m_tmax_08.tif   
  inflating: wc2.1_10m_tmax_09.tif   
  inflating: wc2.1_10m_tmax_10.tif   
  inflating: wc2.1_10m_tmax_11.tif   
  inflating: wc2.1_10m_tmax_12.tif   
[2024-05-05 03:26:00 - INFO] - Getting geotransform and data...
[2024-05-05 03:26:00 - INFO] - Getting XYZ data...
[2024-05-05 03:26:00 - INFO] - Getting geotransformed coordinates...
[2024-05-05 03:26:00 - INFO] - Building XYZ data...
[2024-05-05 03:26:11 - INFO] - New XYZ (csv file) created...


In [None]:
caminho = 'wc2.1_10m_tmax_01.csv'

In [None]:
tempos_tmax_10m_pandas = calcular_tempo_execução(carregar_csv_pandas_dataframe, caminho)
tempos_tmax_10m_pandas

array([1.14577042, 1.065425  , 1.06796233, 1.06060689, 1.06280462,
       1.68506023, 1.65656975, 1.64193231, 1.68576336, 1.54232639,
       1.54882975, 1.56067302, 1.54567521, 1.59078913, 1.53827337,
       1.04816972, 1.05080167, 1.07025979, 1.05222374, 1.04440354,
       1.05534045, 1.05006567, 1.05135041, 1.04812837, 1.65339027,
       1.69723092, 1.64060141, 1.68963323, 1.53648165, 1.55833837,
       1.54044601, 1.55066103])

In [None]:
tempos_tmax_10m_spark = calcular_tempo_execução(carregar_csv_spark_dataframe, caminho)
tempos_tmax_10m_spark

array([0.31741242, 0.34659916, 0.67164996, 0.2497124 , 0.42431173,
       0.52964002, 0.42156102, 0.34897502, 0.11568891, 0.15950086,
       0.21276041, 0.13845357, 0.21803963, 0.24602251, 0.17652461,
       0.32978719, 0.15396393, 0.26225915, 0.40170511, 0.28857844,
       0.31924561, 0.37512576, 0.23125244, 0.31920805, 0.22731866,
       0.26859756, 0.50621371, 0.20218027, 0.30880089, 0.38200384,
       0.30288018, 0.53120345])

## maximum temperature (°C) - 5 minutes

In [None]:
!unzip -o wc2.1_5m_tmax.zip
!raster2xyz wc2.1_5m_tmax_01.tif wc2.1_5m_tmax_01.csv

Archive:  wc2.1_5m_tmax.zip
  inflating: wc2.1_5m_tmax_01.tif    
  inflating: wc2.1_5m_tmax_02.tif    
  inflating: wc2.1_5m_tmax_03.tif    
  inflating: wc2.1_5m_tmax_04.tif    
  inflating: wc2.1_5m_tmax_05.tif    
  inflating: wc2.1_5m_tmax_06.tif    
  inflating: wc2.1_5m_tmax_07.tif    
  inflating: wc2.1_5m_tmax_08.tif    
  inflating: wc2.1_5m_tmax_09.tif    
  inflating: wc2.1_5m_tmax_10.tif    
  inflating: wc2.1_5m_tmax_11.tif    
  inflating: wc2.1_5m_tmax_12.tif    
[2024-05-05 03:27:09 - INFO] - Getting geotransform and data...
[2024-05-05 03:27:09 - INFO] - Getting XYZ data...
[2024-05-05 03:27:09 - INFO] - Getting geotransformed coordinates...
[2024-05-05 03:27:10 - INFO] - Building XYZ data...
[2024-05-05 03:27:55 - INFO] - New XYZ (csv file) created...


In [None]:
caminho = 'wc2.1_5m_tmax_01.csv'

In [None]:
tempos_tmax_5m_pandas = calcular_tempo_execução(carregar_csv_pandas_dataframe, caminho)
tempos_tmax_5m_pandas

array([5.81406333, 4.25728858, 4.23228132, 6.83510865, 6.19864793,
       5.87499672, 4.22707689, 4.21086917, 6.63218258, 6.2306895 ,
       6.08276236, 4.21098008, 4.2042824 , 6.36024542, 6.46560765,
       6.23463887, 4.20677787, 4.19516268, 6.1266056 , 8.18477583,
       6.39089196, 4.35814194, 4.199565  , 5.71649772, 6.45629645,
       6.26901125, 4.87462694, 4.20460812, 5.48464883, 7.03956044,
       6.60218129, 6.49956204])

In [None]:
tempos_tmax_5m_spark = calcular_tempo_execução(carregar_csv_spark_dataframe, caminho)
tempos_tmax_5m_spark

array([0.33237996, 0.32783826, 0.70159399, 0.59774013, 0.39208637,
       0.48799606, 0.68402144, 0.45347246, 0.5307972 , 0.52782223,
       0.45562413, 0.50926794, 0.50089446, 0.60659993, 0.61680847,
       0.6205956 , 0.44254287, 0.35456353, 0.488107  , 0.26792379,
       0.51469691, 0.39672615, 0.45342397, 0.45273829, 0.29933982,
       0.28167073, 0.20492498, 0.20126426, 0.16777502, 0.30729408,
       0.1625291 , 0.29630257])

## maximum temperature (°C) - 2.5 minutes

In [None]:
!unzip -o wc2.1_2.5m_tmax.zip
!raster2xyz wc2.1_2.5m_tmax_01.tif wc2.1_2.5m_tmax_01.csv

Archive:  wc2.1_2.5m_tmax.zip
  inflating: wc2.1_2.5m_tmax_01.tif  
  inflating: wc2.1_2.5m_tmax_02.tif  
  inflating: wc2.1_2.5m_tmax_03.tif  
  inflating: wc2.1_2.5m_tmax_04.tif  
  inflating: wc2.1_2.5m_tmax_05.tif  
  inflating: wc2.1_2.5m_tmax_06.tif  
  inflating: wc2.1_2.5m_tmax_07.tif  
  inflating: wc2.1_2.5m_tmax_08.tif  
  inflating: wc2.1_2.5m_tmax_09.tif  
  inflating: wc2.1_2.5m_tmax_10.tif  
  inflating: wc2.1_2.5m_tmax_11.tif  
  inflating: wc2.1_2.5m_tmax_12.tif  
[2024-05-05 03:31:17 - INFO] - Getting geotransform and data...
[2024-05-05 03:31:18 - INFO] - Getting XYZ data...
[2024-05-05 03:31:19 - INFO] - Getting geotransformed coordinates...
[2024-05-05 03:31:20 - INFO] - Building XYZ data...
[2024-05-05 03:34:18 - INFO] - New XYZ (csv file) created...


In [None]:
caminho = 'wc2.1_2.5m_tmax_01.csv'

In [None]:
tempos_tmax_2_5m_pandas = calcular_tempo_execução(carregar_csv_pandas_dataframe, caminho)
tempos_tmax_2_5m_pandas

array([26.73142437, 26.37797096, 29.5336333 , 23.23373661, 25.22137616,
       21.05992993, 24.11774114, 22.9715695 , 27.09830325, 26.95660172,
       20.65927704, 26.14766159, 21.18612355, 22.84234813, 23.98149512,
       20.92609901, 25.47939626, 22.79968255, 28.59868439, 23.65261095,
       24.8577057 , 22.31473772, 21.3911671 , 25.37439465, 20.99630117,
       23.69268335, 24.58591468, 38.79006027, 26.80051478, 21.22664294,
       25.66526452, 21.71297053])

In [None]:
tempos_tmax_2_5m_spark = calcular_tempo_execução(carregar_csv_spark_dataframe, caminho)
tempos_tmax_2_5m_spark

array([0.42524551, 0.33667031, 0.6691793 , 0.30961909, 0.54985269,
       0.29899613, 0.45069243, 0.18175942, 0.24736885, 0.34870092,
       0.46602408, 0.29165522, 0.20064963, 0.42882115, 0.15841262,
       0.34483292, 0.33915346, 0.2337079 , 0.4538029 , 0.35738783,
       0.37101775, 0.16634394, 0.25517216, 0.47535965, 0.35184963,
       0.33488681, 0.29764513, 0.39766627, 0.19070699, 0.26259059,
       0.47166282, 0.31954537])

## maximum temperature (°C) - 30 seconds

In [None]:
!unzip -o wc2.1_30s_tmax.zip
!raster2xyz wc2.1_30s_tmax_01.tif wc2.1_30s_tmax_01.csv

Archive:  wc2.1_30s_tmax.zip
  inflating: wc2.1_30s_tmax_01.tif   
  inflating: wc2.1_30s_tmax_02.tif   
  inflating: wc2.1_30s_tmax_03.tif   
  inflating: wc2.1_30s_tmax_04.tif   
  inflating: wc2.1_30s_tmax_05.tif   
  inflating: wc2.1_30s_tmax_06.tif   
  inflating: wc2.1_30s_tmax_07.tif   
  inflating: wc2.1_30s_tmax_08.tif   
  inflating: wc2.1_30s_tmax_09.tif   
  inflating: wc2.1_30s_tmax_10.tif   
  inflating: wc2.1_30s_tmax_11.tif   
  inflating: wc2.1_30s_tmax_12.tif   
[2024-05-05 03:48:54 - INFO] - Getting geotransform and data...
[2024-05-05 03:49:26 - INFO] - Getting XYZ data...
^C


⚠️⚠️⚠️ NÃO RODOU

# ➡️ average temperature (°C)
---

## average temperature (°C) - 10 minutes

In [None]:
!unzip -o wc2.1_10m_tavg.zip
!raster2xyz wc2.1_10m_tavg_01.tif wc2.1_10m_tavg_01.csv

Archive:  wc2.1_10m_tavg.zip
  inflating: wc2.1_10m_tavg_01.tif   
  inflating: wc2.1_10m_tavg_02.tif   
  inflating: wc2.1_10m_tavg_03.tif   
  inflating: wc2.1_10m_tavg_04.tif   
  inflating: wc2.1_10m_tavg_05.tif   
  inflating: wc2.1_10m_tavg_06.tif   
  inflating: wc2.1_10m_tavg_07.tif   
  inflating: wc2.1_10m_tavg_08.tif   
  inflating: wc2.1_10m_tavg_09.tif   
  inflating: wc2.1_10m_tavg_10.tif   
  inflating: wc2.1_10m_tavg_11.tif   
  inflating: wc2.1_10m_tavg_12.tif   
[2024-05-05 03:50:03 - INFO] - Getting geotransform and data...
[2024-05-05 03:50:03 - INFO] - Getting XYZ data...
[2024-05-05 03:50:03 - INFO] - Getting geotransformed coordinates...
[2024-05-05 03:50:03 - INFO] - Building XYZ data...
[2024-05-05 03:50:15 - INFO] - New XYZ (csv file) created...


In [None]:
caminho = 'wc2.1_10m_tavg_01.csv'

In [None]:
tempos_tavg_10m_pandas = calcular_tempo_execução(carregar_csv_pandas_dataframe, caminho)
tempos_tavg_10m_pandas

array([1.67095224, 1.59415945, 1.54740913, 1.56684125, 1.54576367,
       1.55890231, 1.55046573, 1.55559488, 1.53406872, 1.61137117,
       1.65728699, 1.55598311, 1.05358486, 1.0486462 , 1.04736736,
       1.06346051, 1.0590445 , 1.05737687, 1.04978988, 1.05203672,
       1.04905119, 1.63300553, 1.70817386, 1.54133398, 1.66461122,
       1.72526807, 1.70723569, 1.57014195, 1.53789074, 1.54831019,
       1.5375979 , 1.55244788])

In [None]:
tempos_tavg_10m_spark = calcular_tempo_execução(carregar_csv_spark_dataframe, caminho)
tempos_tavg_10m_spark

array([0.42102882, 0.3772685 , 0.66608334, 0.28470964, 0.37043253,
       0.34310877, 0.22473347, 0.41048625, 0.30841426, 0.41305062,
       0.13252678, 0.2884215 , 0.38640405, 0.31257314, 0.34583945,
       0.23527507, 0.44793408, 0.22378339, 0.27146326, 0.39783318,
       0.30372355, 0.53384807, 0.29060354, 0.45317339, 0.1310093 ,
       0.30083523, 0.33361983, 0.31134281, 0.33947985, 0.20031046,
       0.37446124, 0.33696253])

## average temperature (°C) - 5 minutes

In [None]:
!unzip -o wc2.1_5m_tavg.zip
!raster2xyz wc2.1_5m_tavg_01.tif wc2.1_5m_tavg_01.csv

Archive:  wc2.1_5m_tavg.zip
  inflating: wc2.1_5m_tavg_01.tif    
  inflating: wc2.1_5m_tavg_02.tif    
  inflating: wc2.1_5m_tavg_03.tif    
  inflating: wc2.1_5m_tavg_04.tif    
  inflating: wc2.1_5m_tavg_05.tif    
  inflating: wc2.1_5m_tavg_06.tif    
  inflating: wc2.1_5m_tavg_07.tif    
  inflating: wc2.1_5m_tavg_08.tif    
  inflating: wc2.1_5m_tavg_09.tif    
  inflating: wc2.1_5m_tavg_10.tif    
  inflating: wc2.1_5m_tavg_11.tif    
  inflating: wc2.1_5m_tavg_12.tif    
[2024-05-05 03:51:15 - INFO] - Getting geotransform and data...
[2024-05-05 03:51:16 - INFO] - Getting XYZ data...
[2024-05-05 03:51:16 - INFO] - Getting geotransformed coordinates...
[2024-05-05 03:51:16 - INFO] - Building XYZ data...
[2024-05-05 03:51:59 - INFO] - New XYZ (csv file) created...


In [None]:
caminho = 'wc2.1_5m_tavg_01.csv'

In [None]:
tempos_tavg_5m_pandas = calcular_tempo_execução(carregar_csv_pandas_dataframe, caminho)
tempos_tavg_5m_pandas

array([4.82019564, 7.20367274, 6.69889182, 6.75569286, 6.40686515,
       6.39544438, 6.45621162, 6.74151329, 6.65716585, 6.00474909,
       4.23271093, 6.22652038, 6.7432803 , 6.20075697, 6.24374174,
       6.29305074, 4.21396161, 4.21568794, 9.2360425 , 6.73970079,
       6.2178584 , 6.22223169, 6.38309267, 4.23248857, 4.22493928,
       6.07614687, 6.72506865, 6.20904935, 6.23014675, 6.45116059,
       4.23687062, 4.22860959])

In [None]:
tempos_tavg_5m_spark = calcular_tempo_execução(carregar_csv_spark_dataframe, caminho)
tempos_tavg_5m_spark

array([0.13754369, 0.24337861, 0.18697963, 0.25164387, 0.13774813,
       0.25777475, 0.1918316 , 0.26896528, 0.39378831, 0.26827014,
       0.31802015, 0.16084776, 0.35469106, 0.15057749, 0.32005777,
       0.14245894, 0.2197229 , 0.35420048, 0.27003468, 0.37406303,
       0.1906466 , 0.41515415, 0.20423219, 0.39962087, 0.14870495,
       0.48631136, 0.36248778, 0.27313976, 0.39607759, 0.22643192,
       0.44659319, 0.4038117 ])

## average temperature (°C) - 2.5 minutes

In [None]:
!unzip -o wc2.1_2.5m_tavg.zip
!raster2xyz wc2.1_2.5m_tavg_01.tif wc2.1_2.5m_tavg_01.csv

Archive:  wc2.1_2.5m_tavg.zip
  inflating: wc2.1_2.5m_tavg_01.tif  
  inflating: wc2.1_2.5m_tavg_02.tif  
  inflating: wc2.1_2.5m_tavg_03.tif  
  inflating: wc2.1_2.5m_tavg_04.tif  
  inflating: wc2.1_2.5m_tavg_05.tif  
  inflating: wc2.1_2.5m_tavg_06.tif  
  inflating: wc2.1_2.5m_tavg_07.tif  
  inflating: wc2.1_2.5m_tavg_08.tif  
  inflating: wc2.1_2.5m_tavg_09.tif  
  inflating: wc2.1_2.5m_tavg_10.tif  
  inflating: wc2.1_2.5m_tavg_11.tif  
  inflating: wc2.1_2.5m_tavg_12.tif  
[2024-05-05 03:55:30 - INFO] - Getting geotransform and data...
[2024-05-05 03:55:32 - INFO] - Getting XYZ data...
[2024-05-05 03:55:34 - INFO] - Getting geotransformed coordinates...
[2024-05-05 03:55:36 - INFO] - Building XYZ data...
[2024-05-05 03:58:48 - INFO] - New XYZ (csv file) created...


In [None]:
caminho = 'wc2.1_2.5m_tavg_01.csv'

In [None]:
tempos_tavg_2_5m_pandas = calcular_tempo_execução(carregar_csv_pandas_dataframe, caminho)
tempos_tavg_2_5m_pandas

array([25.92601895, 26.74321404, 37.76884241, 26.62354717, 22.06762857,
       25.57530034, 21.0713868 , 25.58962923, 20.93037621, 27.3764099 ,
       26.77860046, 26.85714983, 25.67785468, 20.99784634, 25.98157905,
       21.12531006, 27.87736599, 21.00714787, 28.62419665, 25.87116102,
       25.973081  , 23.76427789, 24.38630349, 23.00748256, 25.30407063,
       22.25351067, 38.81897458, 27.74007003, 26.74062969, 25.7140308 ,
       21.11864207, 25.82659912])

In [None]:
tempos_tavg_2_5m_spark = calcular_tempo_execução(carregar_csv_spark_dataframe, caminho)
tempos_tavg_2_5m_spark

array([0.41964448, 0.37372126, 0.39187688, 0.39344966, 0.21751081,
       0.38105143, 0.21157695, 0.36150316, 0.26634291, 0.35143749,
       0.27863522, 0.3935065 , 0.43555683, 0.21598705, 0.41468139,
       0.31380944, 0.38622067, 0.28743104, 0.41143912, 0.34077392,
       0.3325367 , 0.27403363, 0.38838305, 0.48656531, 0.21560195,
       0.35215549, 0.30595384, 0.3825905 , 0.14454864, 0.17518703,
       0.17586039, 0.13406395])

## average temperature (°C) - 30 seconds

In [None]:
!unzip -o wc2.1_30s_tavg.zip
!raster2xyz wc2.1_30s_tavg_01.tif wc2.1_30s_tavg_01.csv

Archive:  wc2.1_30s_tavg.zip
  inflating: wc2.1_30s_tavg_01.tif   
  inflating: wc2.1_30s_tavg_02.tif   
  inflating: wc2.1_30s_tavg_03.tif   
  inflating: wc2.1_30s_tavg_04.tif   
  inflating: wc2.1_30s_tavg_05.tif   
  inflating: wc2.1_30s_tavg_06.tif   
  inflating: wc2.1_30s_tavg_07.tif   
  inflating: wc2.1_30s_tavg_08.tif   
  inflating: wc2.1_30s_tavg_09.tif   
  inflating: wc2.1_30s_tavg_10.tif   
  inflating: wc2.1_30s_tavg_11.tif   
  inflating: wc2.1_30s_tavg_12.tif   
[2024-05-05 04:14:05 - INFO] - Getting geotransform and data...
[2024-05-05 04:14:29 - INFO] - Getting XYZ data...
^C


⚠️⚠️⚠️ NÃO RODOU

# ☔ precipitation (mm)
---

## precipitation (mm) - 10 minutes

In [None]:
!unzip -o wc2.1_10m_prec.zip
!raster2xyz wc2.1_10m_prec_01.tif wc2.1_10m_prec_01.csv

Archive:  wc2.1_10m_prec.zip
  inflating: wc2.1_10m_prec_01.tif   
  inflating: wc2.1_10m_prec_02.tif   
  inflating: wc2.1_10m_prec_03.tif   
  inflating: wc2.1_10m_prec_04.tif   
  inflating: wc2.1_10m_prec_05.tif   
  inflating: wc2.1_10m_prec_06.tif   
  inflating: wc2.1_10m_prec_07.tif   
  inflating: wc2.1_10m_prec_08.tif   
  inflating: wc2.1_10m_prec_09.tif   
  inflating: wc2.1_10m_prec_10.tif   
  inflating: wc2.1_10m_prec_11.tif   
  inflating: wc2.1_10m_prec_12.tif   
  inflating: readme.txt              
[2024-05-05 04:15:50 - INFO] - Getting geotransform and data...
[2024-05-05 04:15:50 - INFO] - Getting XYZ data...
[2024-05-05 04:15:51 - INFO] - Getting geotransformed coordinates...
[2024-05-05 04:15:51 - INFO] - Building XYZ data...
[2024-05-05 04:16:01 - INFO] - New XYZ (csv file) created...


In [None]:
caminho = 'wc2.1_10m_prec_01.csv'

In [None]:
tempos_prec_10m_pandas = calcular_tempo_execução(carregar_csv_pandas_dataframe, caminho)
tempos_prec_10m_pandas

array([1.51927415, 1.43707332, 1.414928  , 1.54524253, 1.50835236,
       1.55149081, 1.51785936, 1.18579279, 0.95659283, 0.96206645,
       0.95524317, 0.95848028, 0.9545697 , 0.95419605, 0.9551691 ,
       0.94915459, 0.9490499 , 1.0540889 , 1.71353275, 1.46411795,
       1.41847393, 1.4103377 , 1.54985966, 1.54938592, 1.57557736,
       1.5551321 , 1.50846001, 1.42193133, 1.41690959, 1.41551479,
       1.4321957 , 1.41176329])

In [None]:
tempos_prec_10m_spark = calcular_tempo_execução(carregar_csv_spark_dataframe, caminho)
tempos_prec_10m_spark

array([0.30482619, 0.13653409, 0.2120786 , 0.36034999, 0.28157877,
       0.35912488, 0.1470022 , 0.21906631, 0.35057897, 0.23151211,
       0.65682785, 0.24400139, 0.34776824, 0.37608672, 0.20400647,
       0.37498888, 0.12676718, 0.22520513, 0.37019983, 0.26328045,
       0.38790511, 0.16944001, 0.27755706, 0.40283426, 0.21213784,
       0.3605708 , 0.14207367, 0.30304419, 0.49379443, 0.32167711,
       0.39654875, 0.24292329])

## precipitation (mm) - 5 minutes

In [None]:
!unzip -o wc2.1_5m_prec.zip
!raster2xyz wc2.1_5m_prec_01.tif wc2.1_5m_prec_01.csv

Archive:  wc2.1_5m_prec.zip
  inflating: wc2.1_5m_prec_01.tif    
  inflating: wc2.1_5m_prec_02.tif    
  inflating: wc2.1_5m_prec_03.tif    
  inflating: wc2.1_5m_prec_04.tif    
  inflating: wc2.1_5m_prec_05.tif    
  inflating: wc2.1_5m_prec_06.tif    
  inflating: wc2.1_5m_prec_07.tif    
  inflating: wc2.1_5m_prec_08.tif    
  inflating: wc2.1_5m_prec_09.tif    
  inflating: wc2.1_5m_prec_10.tif    
  inflating: wc2.1_5m_prec_11.tif    
  inflating: wc2.1_5m_prec_12.tif    
  inflating: readme.txt              
[2024-05-05 04:16:55 - INFO] - Getting geotransform and data...
[2024-05-05 04:16:55 - INFO] - Getting XYZ data...
[2024-05-05 04:16:56 - INFO] - Getting geotransformed coordinates...
[2024-05-05 04:16:56 - INFO] - Building XYZ data...
[2024-05-05 04:17:40 - INFO] - New XYZ (csv file) created...


In [None]:
caminho = 'wc2.1_5m_prec_01.csv'

In [None]:
tempos_prec_5m_pandas = calcular_tempo_execução(carregar_csv_pandas_dataframe, caminho)
tempos_prec_5m_pandas

array([6.39154439, 5.92342111, 8.19233981, 7.01097071, 6.01643596,
       6.10632513, 6.04347806, 5.95313455, 4.88316249, 3.85581206,
       3.82810739, 6.0193482 , 6.19276883, 5.86391657, 5.72614275,
       5.73212945, 5.73849574, 5.97827249, 4.58023509, 3.87089517,
       4.0410539 , 5.95699763, 6.25491037, 5.80784164, 5.68447552,
       5.73971775, 5.72248707, 5.99799691, 4.23508819, 3.84163777,
       4.47294031, 5.963031  ])

In [None]:
tempos_prec_5m_spark = calcular_tempo_execução(carregar_csv_spark_dataframe, caminho)
tempos_prec_5m_spark

array([0.47869786, 0.31133547, 0.22973939, 0.47184205, 0.19267242,
       0.27411474, 0.54871087, 0.27950166, 0.48008696, 0.26893477,
       0.35111891, 0.44612285, 0.23773102, 0.43516525, 0.17638305,
       0.45671845, 0.28400417, 0.24335205, 0.45140305, 0.22428442,
       0.29164521, 0.47148469, 0.32091162, 0.44929981, 0.15532159,
       0.27900936, 0.46287906, 0.33060799, 0.38303438, 0.15876685,
       0.19132499, 0.38986449])

## precipitation (mm) - 2.5 minutes

In [None]:
!unzip -o wc2.1_2.5m_prec.zip
!raster2xyz wc2.1_2.5m_prec_01.tif wc2.1_2.5m_prec_01.csv

Archive:  wc2.1_2.5m_prec.zip
  inflating: wc2.1_2.5m_prec_01.tif  
  inflating: wc2.1_2.5m_prec_02.tif  
  inflating: wc2.1_2.5m_prec_03.tif  
  inflating: wc2.1_2.5m_prec_04.tif  
  inflating: wc2.1_2.5m_prec_05.tif  
  inflating: wc2.1_2.5m_prec_06.tif  
  inflating: wc2.1_2.5m_prec_07.tif  
  inflating: wc2.1_2.5m_prec_08.tif  
  inflating: wc2.1_2.5m_prec_09.tif  
  inflating: wc2.1_2.5m_prec_10.tif  
  inflating: wc2.1_2.5m_prec_11.tif  
  inflating: wc2.1_2.5m_prec_12.tif  
  inflating: readme.txt              
[2024-05-05 04:20:52 - INFO] - Getting geotransform and data...
[2024-05-05 04:20:53 - INFO] - Getting XYZ data...
[2024-05-05 04:20:55 - INFO] - Getting geotransformed coordinates...
[2024-05-05 04:20:57 - INFO] - Building XYZ data...
[2024-05-05 04:23:52 - INFO] - New XYZ (csv file) created...


In [None]:
caminho = 'wc2.1_2.5m_prec_01.csv'

In [None]:
tempos_prec_2_5m_pandas = calcular_tempo_execução(carregar_csv_pandas_dataframe, caminho)
tempos_prec_2_5m_pandas

array([19.45368084, 25.38011016, 34.14655192, 24.95996389, 24.46642099,
       19.87932867, 23.35498997, 21.34926486, 21.86489129, 24.05995282,
       24.75479788, 24.36242265, 24.72724992, 23.25351488, 20.70214057,
       25.35938833, 23.47048783, 19.72788667, 25.08780571, 24.0962693 ,
       23.8388793 , 23.90300585, 23.98627174, 23.06813673, 19.45825527,
       23.5368384 , 22.99657036, 22.47849592, 36.81212408, 25.65002584,
       24.19158434, 24.73529134])

In [None]:
tempos_prec_2_5m_spark = calcular_tempo_execução(carregar_csv_spark_dataframe, caminho)
tempos_prec_2_5m_spark

array([0.32496469, 0.29079905, 0.2562235 , 0.19999514, 0.21633655,
       0.12899225, 0.22311023, 0.20631373, 0.21392008, 0.22193626,
       0.21044452, 0.23971421, 0.12183562, 0.22069726, 0.27088385,
       0.23300601, 0.25916312, 0.21282692, 0.22095109, 0.22071532,
       0.11314382, 0.16685637, 0.13469988, 0.23159874, 0.13413818,
       0.23526262, 0.1914287 , 0.0971901 , 0.14301413, 0.18201442,
       0.15205962, 0.24408136])

## precipitation (mm) - 30 seconds

In [None]:
!unzip -o wc2.1_30s_prec.zip
!raster2xyz wc2.1_30s_prec_01.tif wc2.1_30s_prec_01.csv

Archive:  wc2.1_30s_prec.zip
  inflating: wc2.1_30s_prec_01.tif   
  inflating: wc2.1_30s_prec_02.tif   
  inflating: wc2.1_30s_prec_03.tif   
  inflating: wc2.1_30s_prec_04.tif   
  inflating: wc2.1_30s_prec_05.tif   
  inflating: wc2.1_30s_prec_06.tif   
  inflating: wc2.1_30s_prec_07.tif   
  inflating: wc2.1_30s_prec_08.tif   
  inflating: wc2.1_30s_prec_09.tif   
  inflating: wc2.1_30s_prec_10.tif   
  inflating: wc2.1_30s_prec_11.tif   
  inflating: wc2.1_30s_prec_12.tif   
  inflating: readme.txt              
[2024-05-05 04:37:11 - INFO] - Getting geotransform and data...
[2024-05-05 04:37:18 - INFO] - Getting XYZ data...
^C


⚠️⚠️⚠️ NÃO RODOU

# 🕵️‍♂️ Observações
* Iterar o processo de descompactação usando o módulo 'os'.
* Organizar melhor os diretórios de extração e criar variáveis globais para controlá-los.
* Mencionar no artigo que as declarações de função retardam a execução do código, pois precisam ser carregadas na memória antes da execução, mas como os testes são todos realizados dentro da mesma estrutura, esse fator se torna equivalente entre as amostras.
* Estruturar os resultados de tempo em um dicionário.
* Referenciar o Kaggle no artigo, mas mencionar que não o utilizamos devido a problemas de estabilidade no ambiente.
* Fornecer uma explicação melhor de como o timeit funciona e os parâmetros escolhidos.
* Com o timeit, a latência não é uma preocupação, ao contrário do %%time; explicar isso mais detalhadamente.
* Uso de arquivos colunares? Não neste estudo, mas um tópico potencial para um artigo futuro.
* O erro ao extrair a resolução de 30 segundos é causado pela biblioteca ou pelo ambiente?
* Databricks mais lento em resoluções menores, devido ao hardware ou características de processamento distribuído?
* Fechar a sessão do Spark após o uso.
* Código processando apenas o arquivo 01 de cada arquivo zipado.