<a href="https://colab.research.google.com/github/armandossrecife/teste/blob/main/download_sync_async.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Utilidades

In [134]:
from urllib.parse import urlparse
import os

class Util:
  def extrair_nome_extensao_url(self, url):
    try:
      parsed_url = urlparse(url)
      if parsed_url.scheme not in ('http', 'https', 'ftp'):
        raise ValueError(f"Unsupported protocol: {parsed_url.scheme}")

      caminho_arquivo = parsed_url.path
      if not caminho_arquivo:
        raise ValueError("Missing file path in URL")

      #nome_arquivo, extensao = os.path.splitext(os.path.basename(caminho_arquivo))
      nome_arquivo, extensao = os.path.basename(caminho_arquivo).rsplit('.', 1)

      if not nome_arquivo:
        raise ValueError("Missing file name")

      return nome_arquivo, extensao

    except Exception as ex:
      raise ValueError(f"{str(ex)}") from ex

In [135]:
!rm -rf sincrono && mkdir sincrono
!rm -rf assincrono && mkdir assincrono
!rm -rf threads && mkdir threads

In [136]:
!ls -lia

total 28
3932183 drwxr-xr-x 1 root root 4096 Jul 15 15:25 .
5242973 drwxr-xr-x 1 root root 4096 Jul 15 13:03 ..
5243036 drwxr-xr-x 2 root root 4096 Jul 15 15:25 assincrono
1310737 drwxr-xr-x 4 root root 4096 Jul 11 13:21 .config
3932184 drwxr-xr-x 1 root root 4096 Jul 11 13:22 sample_data
5243034 drwxr-xr-x 2 root root 4096 Jul 15 15:25 sincrono
5243038 drwxr-xr-x 2 root root 4096 Jul 15 15:25 threads


In [137]:
my_raw_data_site = "https://raw.githubusercontent.com/armandossrecife/teste/main"
my_url1 = my_raw_data_site + "/" + "Adrienne.mp4"
my_url2 = my_raw_data_site + "/" + "Pizigani_1367_Chart_10MB.jpg"
my_url3 = my_raw_data_site + "/" + "Kalimba.mp3"
my_url4 = my_raw_data_site + "/" + "screen_matrix.jpeg"
my_url5 = my_raw_data_site + "/" + "demo.zip"

my_urls = [my_url1, my_url2, my_url3, my_url4, my_url5]

my_util = Util()
my_filenames = []
for url in my_urls:
  print(url)
  nome_arquivo, extensao = my_util.extrair_nome_extensao_url(url)
  filename = f"{nome_arquivo}.{extensao}"
  print(f"{nome_arquivo}.{extensao}")
  my_filenames.append(filename)

https://raw.githubusercontent.com/armandossrecife/teste/main/Adrienne.mp4
Adrienne.mp4
https://raw.githubusercontent.com/armandossrecife/teste/main/Pizigani_1367_Chart_10MB.jpg
Pizigani_1367_Chart_10MB.jpg
https://raw.githubusercontent.com/armandossrecife/teste/main/Kalimba.mp3
Kalimba.mp3
https://raw.githubusercontent.com/armandossrecife/teste/main/screen_matrix.jpeg
screen_matrix.jpeg
https://raw.githubusercontent.com/armandossrecife/teste/main/demo.zip
demo.zip


# Download síncrono

https://requests.readthedocs.io

Requests is an elegant and simple HTTP library for Python, built for human beings.

In [138]:
import requests

def download_one_file(url, filename, path):
  response = requests.get(url, stream=True)
  if response.status_code == 200:
    total_size = int(response.headers['content-length'])  # Get total file size
    print(f"Total file size: {total_size} bytes")
    path = os.path.join(path, filename)
    with open(path, 'wb') as f:
      for chunk in response.iter_content(1024):
        f.write(chunk)
    print(f"Downloaded {filename}")
  else:
    print(f"Failed to download {filename}")

def download_files_synchronous(my_urls, my_filenames, path):
  # Download each file synchronously
  for url, filename in zip(my_urls, my_filenames):
    download_one_file(url, filename, path)

In [139]:
import datetime

now1 = datetime.datetime.now()
print(now1)

download_files_synchronous(my_urls, my_filenames, 'sincrono')
print("All files downloaded (synchronously)")

now2 = datetime.datetime.now()
print(now2)

time_diff = now2 - now1
print(time_diff)

2024-07-15 15:25:27.374346
Total file size: 14944332 bytes
Downloaded Adrienne.mp4
Total file size: 10174706 bytes
Downloaded Pizigani_1367_Chart_10MB.jpg
Total file size: 8414449 bytes
Downloaded Kalimba.mp3
Total file size: 265136 bytes
Downloaded screen_matrix.jpeg
Total file size: 69856 bytes
Downloaded demo.zip
All files downloaded (synchronously)
2024-07-15 15:25:29.520397
0:00:02.146051


# Download assíncrono

https://docs.aiohttp.org

Asynchronous HTTP Client/Server for asyncio and Python.

https://en.wikipedia.org/wiki/Asynchrony_(computer_programming)

https://en.wikipedia.org/wiki/Async/await

In [3]:
!pip install aiohttp



In [4]:
!pip install aiodns



In [140]:
import asyncio
import aiohttp
import datetime

In [141]:
async def teste_async():
  async with aiohttp.ClientSession() as session:
    async with session.get('http://python.org') as response:
      print("Status:", response.status)
      print("Content-type:", response.headers['content-type'])
      html = await response.text()

In [142]:
async def call_teste_async():
  await teste_async()

await call_teste_async()

Status: 200
Content-type: text/html; charset=utf-8


In [143]:
async def download_async(url, filename, path):
  """Downloads a file from the given URL and saves it with the specified filename."""
  async with aiohttp.ClientSession() as session:
    async with session.get(url) as response:
      total_size = int(response.headers['content-length'])  # Get total file size
      print(f"Total file size: {total_size} bytes")
      if response.status == 200:
        path = os.path.join(path, filename)
        with open(path, 'wb') as f:
          # Read content in chunks using aiohttp.read()
          while True:
            chunk = await response.content.read(1024)  # Read in chunks of 1024 bytes
            if not chunk:
              break
            f.write(chunk)
        print(f"Downloaded {filename}")
      else:
        print(f"Failed to download {filename}")

async def download_files_asynchronous(my_urls, my_filenames, path):
  """Downloads all files asynchronously."""
  tasks = []
  for url, filename in zip(my_urls, my_filenames):
    tasks.append(asyncio.create_task(download_async(url, filename, path)))

  # Run all tasks concurrently using asyncio.run
  await asyncio.gather(*tasks)

In [144]:
now1 = datetime.datetime.now()
print(now1)

async def main():
  await download_files_asynchronous(my_urls, my_filenames, 'assincrono')

# Call main as an async function and await it
await main()

now2 = datetime.datetime.now()
print(now2)

time_diff = now2 - now1
print(time_diff)

2024-07-15 15:25:47.661875
Total file size: 69856 bytes
Total file size: 265136 bytes
Total file size: 14944332 bytes
Total file size: 10174706 bytes
Total file size: 8414449 bytes
Downloaded demo.zip
Downloaded screen_matrix.jpeg
Downloaded Kalimba.mp3
Downloaded Pizigani_1367_Chart_10MB.jpg
Downloaded Adrienne.mp4
2024-07-15 15:25:47.990049
0:00:00.328174


# Downloads usando Threads

https://docs.python.org/3/library/threading.html

https://en.wikipedia.org/wiki/Thread_(computing)

In [145]:
import threading
import requests

def download_files_via_threads(urls, filenames, path):
  """Downloads multiple files asynchronously using threads."""
  threads = []
  for url, filename in zip(urls, filenames):
    thread = threading.Thread(target=download_one_file(url, filename, path), args=(url, filename, path))
    threads.append(thread)
    thread.start()

  # Wait for all threads to finish (blocking)
  for thread in threads:
    thread.join()

In [146]:
now1 = datetime.datetime.now()
print(now1)

download_files_via_threads(my_urls, my_filenames, 'threads')

now2 = datetime.datetime.now()
print(now2)

time_diff = now2 - now1
print(time_diff)

2024-07-15 15:30:24.180148
Total file size: 14944332 bytes
Downloaded Adrienne.mp4
Total file size: 10174706 bytes
Downloaded Pizigani_1367_Chart_10MB.jpg
Total file size: 8414449 bytes
Downloaded Kalimba.mp3
Total file size: 265136 bytes
Downloaded screen_matrix.jpeg
Total file size: 69856 bytes
Downloaded demo.zip
2024-07-15 15:30:25.888925
0:00:01.708777


# Conceitos chaves

## Chamadas síncronas

In [147]:
import time

def task1_sync():
  """Simulates a long-running task that takes 2 seconds."""
  print("Task 1 started")
  print(datetime.datetime.now())
  time.sleep(2)  # Simulate work for 2 seconds
  print("Task 1 finished")
  print(datetime.datetime.now())

def task2_sync():
  """Simulates a shorter task that takes 1 second."""
  print("Task 2 started")
  print(datetime.datetime.now())
  time.sleep(1)  # Simulate work for 1 second
  print("Task 2 finished")
  print(datetime.datetime.now())

def run_tasks_sinc():
  """Runs two tasks concurrently using asyncio."""
  task1_sync()
  task2_sync()

  print("All tasks finished")

In [148]:
print("#"*50)
print("Chamada síncrona")
now1 = datetime.datetime.now()
print(now1)
print("-"*50)

run_tasks_sinc()

now2 = datetime.datetime.now()
print(now2)
print("-"*50)
print(f"Tempo total: {now2-now1}")

##################################################
Chamada síncrona
2024-07-15 15:30:48.336812
--------------------------------------------------
Task 1 started
2024-07-15 15:30:48.339029
Task 1 finished
2024-07-15 15:30:50.341799
Task 2 started
2024-07-15 15:30:50.341869
Task 2 finished
2024-07-15 15:30:51.343135
All tasks finished
2024-07-15 15:30:51.343440
--------------------------------------------------
Tempo total: 0:00:03.006628


## Chamadas assíncronas

In [149]:
import asyncio

async def task1_async():
  """Simulates a long-running task that takes 2 seconds."""
  print("Task 1 started")
  print(datetime.datetime.now())
  await asyncio.sleep(2)  # Simulate work for 2 seconds
  print("Task 1 finished")
  print(datetime.datetime.now())

async def task2_async():
  """Simulates a shorter task that takes 1 second."""
  print("Task 2 started")
  print(datetime.datetime.now())
  await asyncio.sleep(1)  # Simulate work for 1 second
  print("Task 2 finished")
  print(datetime.datetime.now())

async def run_tasks_asinc():
  """Runs two tasks concurrently using asyncio."""
  task1_future = asyncio.create_task(task1_async())
  task2_future = asyncio.create_task(task2_async())

  # Wait for both tasks to complete concurrently (non-blocking)
  await task1_future
  await task2_future

  print("All tasks finished")

# Run the event loop in terminal
#loop = asyncio.get_event_loop()
#loop.run_until_complete(run_tasks())
#loop.close()

In [150]:
async def call_run_tasks():
  await run_tasks_asinc()

print("#"*50)
print("Chamada assíncrona")
now1 = datetime.datetime.now()
print(now1)
print("-"*50)

await call_run_tasks()

now2 = datetime.datetime.now()
print(now2)
print("-"*50)
print(f"Tempo total: {now2-now1}")

##################################################
Chamada assíncrona
2024-07-15 15:30:56.975527
--------------------------------------------------
Task 1 started
2024-07-15 15:30:56.976090
Task 2 started
2024-07-15 15:30:56.976223
Task 2 finished
2024-07-15 15:30:57.977664
Task 1 finished
2024-07-15 15:30:58.978045
All tasks finished
2024-07-15 15:30:58.978324
--------------------------------------------------
Tempo total: 0:00:02.002797


# Usando threads

In [151]:
import threading
import time

def task1():
  """Simulates a long-running task (2 seconds) using threading."""
  print("Task 1 started (Thread)")
  print(datetime.datetime.now())
  time.sleep(2)  # Simulate work for 2 seconds
  print("Task 1 finished (Thread)")
  print(datetime.datetime.now())

def task2():
  """Simulates a shorter task (1 second) using threading."""
  print("Task 2 started (Thread)")
  print(datetime.datetime.now())
  time.sleep(1)  # Simulate work for 1 second
  print("Task 2 finished (Thread)")
  print(datetime.datetime.now())

def run_tasks_via_threads():
  """Runs two tasks concurrently using threads."""
  thread1 = threading.Thread(target=task1)
  thread2 = threading.Thread(target=task2)

  # Start threads
  thread1.start()
  thread2.start()

  # Wait for threads to finish
  thread1.join()
  thread2.join()

  print("All tasks finished (Threads)")

In [129]:
print("#"*50)
print("Chamada assíncrona usando Threads")
now1 = datetime.datetime.now()
print(now1)
print("-"*50)

run_tasks_via_threads()

now2 = datetime.datetime.now()
print(now2)
print("-"*50)
print(f"Tempo total: {now2-now1}")

##################################################
Chamada assíncrona usando Threads
2024-07-15 15:14:44.332125
--------------------------------------------------
Task 1 started (Thread)Task 2 started (Thread)
2024-07-15 15:14:44.334087

2024-07-15 15:14:44.336096
Task 2 finished (Thread)
2024-07-15 15:14:45.335527
Task 1 finished (Thread)
2024-07-15 15:14:46.340564
All tasks finished (Threads)
2024-07-15 15:14:46.341758
--------------------------------------------------
Tempo total: 0:00:02.009633
