<a href="https://colab.research.google.com/github/armandossrecife/teste/blob/main/download_sync_async.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Utilidades

In [80]:
from urllib.parse import urlparse
import os

class Util:
  def extrair_nome_extensao_url(self, url):
    try:
      parsed_url = urlparse(url)
      if parsed_url.scheme not in ('http', 'https', 'ftp'):
        raise ValueError(f"Unsupported protocol: {parsed_url.scheme}")

      caminho_arquivo = parsed_url.path
      if not caminho_arquivo:
        raise ValueError("Missing file path in URL")

      #nome_arquivo, extensao = os.path.splitext(os.path.basename(caminho_arquivo))
      nome_arquivo, extensao = os.path.basename(caminho_arquivo).rsplit('.', 1)

      if not nome_arquivo:
        raise ValueError("Missing file name")

      return nome_arquivo, extensao

    except Exception as ex:
      raise ValueError(f"{str(ex)}") from ex

In [86]:
!rm -rf sincrono && mkdir sincrono
!rm -rf assincrono && mkdir assincrono

In [87]:
!ls -lia

total 24
3932183 drwxr-xr-x 1 root root 4096 Jul 15 14:41 .
5242973 drwxr-xr-x 1 root root 4096 Jul 15 13:03 ..
5243036 drwxr-xr-x 2 root root 4096 Jul 15 14:41 assincrono
1310737 drwxr-xr-x 4 root root 4096 Jul 11 13:21 .config
3932184 drwxr-xr-x 1 root root 4096 Jul 11 13:22 sample_data
5243034 drwxr-xr-x 2 root root 4096 Jul 15 14:41 sincrono


In [88]:
my_raw_data_site = "https://raw.githubusercontent.com/armandossrecife/teste/main"
my_url1 = my_raw_data_site + "/" + "Adrienne.mp4"
my_url2 = my_raw_data_site + "/" + "Pizigani_1367_Chart_10MB.jpg"
my_url3 = my_raw_data_site + "/" + "Kalimba.mp3"
my_url4 = my_raw_data_site + "/" + "screen_matrix.jpeg"
my_url5 = my_raw_data_site + "/" + "demo.zip"

my_urls = [my_url1, my_url2, my_url3, my_url4, my_url5]

my_util = Util()
my_filenames = []
for url in my_urls:
  print(url)
  nome_arquivo, extensao = my_util.extrair_nome_extensao_url(url)
  filename = f"{nome_arquivo}.{extensao}"
  print(f"{nome_arquivo}.{extensao}")
  my_filenames.append(filename)

https://raw.githubusercontent.com/armandossrecife/teste/main/Adrienne.mp4
Adrienne.mp4
https://raw.githubusercontent.com/armandossrecife/teste/main/Pizigani_1367_Chart_10MB.jpg
Pizigani_1367_Chart_10MB.jpg
https://raw.githubusercontent.com/armandossrecife/teste/main/Kalimba.mp3
Kalimba.mp3
https://raw.githubusercontent.com/armandossrecife/teste/main/screen_matrix.jpeg
screen_matrix.jpeg
https://raw.githubusercontent.com/armandossrecife/teste/main/demo.zip
demo.zip


# Download síncrono

In [89]:
import requests

def download_one_file(url, filename, path):
  response = requests.get(url, stream=True)
  if response.status_code == 200:
    total_size = int(response.headers['content-length'])  # Get total file size
    print(f"Total file size: {total_size} bytes")
    path = os.path.join(path, filename)
    with open(path, 'wb') as f:
      for chunk in response.iter_content(1024):
        f.write(chunk)
    print(f"Downloaded {filename}")
  else:
    print(f"Failed to download {filename}")

def download_files_synchronous(my_urls, my_filenames, path):
  # Download each file synchronously
  for url, filename in zip(my_urls, my_filenames):
    download_one_file(url, filename, path)

In [90]:
import datetime

now1 = datetime.datetime.now()
print(now1)

download_files_synchronous(my_urls, my_filenames, 'sincrono')
print("All files downloaded (synchronously)")

now2 = datetime.datetime.now()
print(now2)

time_diff = now2 - now1
print(time_diff)

2024-07-15 14:42:23.040774
Total file size: 14944332 bytes
Downloaded Adrienne.mp4
Total file size: 10174706 bytes
Downloaded Pizigani_1367_Chart_10MB.jpg
Total file size: 8414449 bytes
Downloaded Kalimba.mp3
Total file size: 265136 bytes
Downloaded screen_matrix.jpeg
Total file size: 69856 bytes
Downloaded demo.zip
All files downloaded (synchronously)
2024-07-15 14:42:25.274679
0:00:02.233905


# Download assíncrono

https://docs.aiohttp.org

Asynchronous HTTP Client/Server for asyncio and Python.

In [3]:
!pip install aiohttp



In [4]:
!pip install aiodns



In [91]:
import asyncio
import aiohttp
import datetime

In [92]:
async def teste_async():
  async with aiohttp.ClientSession() as session:
    async with session.get('http://python.org') as response:
      print("Status:", response.status)
      print("Content-type:", response.headers['content-type'])
      html = await response.text()

In [93]:
async def call_teste_async():
  await teste_async()

await call_teste_async()

Status: 200
Content-type: text/html; charset=utf-8


In [96]:
async def download_async(url, filename, path):
  """Downloads a file from the given URL and saves it with the specified filename."""
  async with aiohttp.ClientSession() as session:
    async with session.get(url) as response:
      total_size = int(response.headers['content-length'])  # Get total file size
      print(f"Total file size: {total_size} bytes")
      if response.status == 200:
        path = os.path.join(path, filename)
        with open(path, 'wb') as f:
          # Read content in chunks using aiohttp.read()
          while True:
            chunk = await response.content.read(1024)  # Read in chunks of 1024 bytes
            if not chunk:
              break
            f.write(chunk)
        print(f"Downloaded {filename}")
      else:
        print(f"Failed to download {filename}")

async def download_files_asynchronous(my_urls, my_filenames, path):
  """Downloads all files asynchronously."""
  tasks = []
  for url, filename in zip(my_urls, my_filenames):
    tasks.append(asyncio.create_task(download_async(url, filename, path)))

  # Run all tasks concurrently using asyncio.run
  await asyncio.gather(*tasks)

In [97]:
now1 = datetime.datetime.now()
print(now1)

async def main():
  await download_files_asynchronous(my_urls, my_filenames, 'assincrono')

# Call main as an async function and await it
await main()

now2 = datetime.datetime.now()
print(now2)

time_diff = now2 - now1
print(time_diff)

2024-07-15 14:44:22.728910
Total file size: 265136 bytes
Total file size: 14944332 bytes
Total file size: 8414449 bytes
Total file size: 10174706 bytes
Downloaded screen_matrix.jpeg
Total file size: 69856 bytes
Downloaded demo.zip
Downloaded Kalimba.mp3
Downloaded Pizigani_1367_Chart_10MB.jpg
Downloaded Adrienne.mp4
2024-07-15 14:44:23.134266
0:00:00.405356
