сайт для тестирования запросов

http://httpbin.org/


In [3]:
import requests
# from requests.models import Response

response = requests.get(url='http://httpbin.org/')
print(response, type(response))

<Response [200]> <class 'requests.models.Response'>


коды ответа HTTP

+ информационные 100 - 199
+ успешные 200 - 299
+ перенаправления 300 - 399
+ клиентские ошибки 400 - 499
+ серверные ошибки 500 - 599

HTTP заголовки

https://developer.mozilla.org/ru/docs/Web/HTTP/Headers

например, `user-agent`
> используется для отображения страницы под браузер и множественных фейковых запросов

> проверить текущее значение можно, введя в адресной строке `about:`

In [7]:
response = requests.get(url='http://httpbin.org/user-agent')
print(f'status: {response.status_code}\nuser-agent:\n{response.text}')

status: 200
user-agent:
{
  "user-agent": "python-requests/2.28.1"
}



In [11]:
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36'
}

response = requests.get(url='http://httpbin.org/user-agent', headers=headers)
print(f'status: {response.status_code}\nuser-agent:\n{response.text}')

status: 200
user-agent:
{
  "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36"
}



In [31]:
# симуляция фейковых users через файл заголовков
def user_agent(n):
    with open('./headers/user_agent.txt') as file:
        i = 0
        while i < n:
            response = requests.get(url='http://httpbin.org/user-agent', 
                                headers={'user-agent': file.readline().strip('\n')})
            yield response
            i += 1


for res in user_agent(2): 
    print(f'status: {res.status_code}\nuser-agent:\n{res.text}')


status: 200
user-agent:
{
  "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0"
}

status: 200
user-agent:
{
  "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 YaBrowser/20.9.3.136 Yowser/2.5 Safari/537.36"
}



In [23]:
# симуляция фейковых users через библиотеку
import fake_useragent as fu
import requests as r


ua = fu.UserAgent()

for i in range(2):
    response = r.get(url='http://httpbin.org/user-agent',
                     headers={'user-agent': ua.random})
    print(f'status: {response.status_code}\nuser-agent:\n{response.text}')


status: 200
user-agent:
{
  "user-agent": "Mozilla/5.0 (Windows NT 5.1; U) Opera 7.54  [de]"
}

status: 200
user-agent:
{
  "user-agent": "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:63.0) Gecko/20100101 Firefox/63.0"
}



прокси `proxies`

> способы скрыть IP адрес, для предотвращения бана

In [38]:
import requests as r
import fake_useragent as fu


def proxy(n):
    with open('./proxies/fake_ip.txt') as file:
        i = 0
        ua = fu.UserAgent()
        while i < n:
            ip = file.readline().strip('\n')
            print(f'ip: {ip}')
            response = r.get(url='http://httpbin.org/ip',
                             headers={'user-agent': ua.random},
                             proxies={'http': f'http://{ip}', 'https': f'https://{ip}'})
            i += 1           
            if response.status_code != 200:
                continue

            yield response


for res in proxy(5):
    print(f'status: {res.status_code}\ntext:\n{res.json()}')

ip: 185.162.230.155:80
ip: 185.162.231.111:80
ip: 185.162.231.202:80
ip: 185.162.231.29:80
ip: 185.162.230.121:80


установка время ожидания ответа на запрос через параметр `timeout`

In [47]:
import requests as r
from fake_useragent import UserAgent
from time import perf_counter


def proxy(n):
    with open('./proxies/fake_ip.txt') as file:
        i = 0
        ua = UserAgent()
        while i < n:
            ip = file.readline().strip('\n')
            print(f'ip: {ip}')
            start = perf_counter()
            try:
                i += 1
                response = r.get(url='http://httpbin.org/ip',
                             headers={'user-agent': ua.random},
                             proxies={'http': f'http://{ip}', 'https': f'https://{ip}'},
                             timeout=2)
            except Exception as ex:
                print(f'error: {ex}\nseconds: {perf_counter() - start:.2f}')
                continue
            
            if response.status_code != 200:
                continue
            yield response


for res in proxy(5):
    print(f'status: {res.status_code}\ntext:\n{res.text}')

ip: 185.162.230.155:80
ip: 185.162.231.111:80
ip: 185.162.231.202:80
ip: 200.12.55.90:80
error: HTTPConnectionPool(host='200.12.55.90', port=80): Max retries exceeded with url: http://httpbin.org/ip (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x113fd3880>, 'Connection to 200.12.55.90 timed out. (connect timeout=2)'))
seconds: 2.00
ip: 185.162.231.29:80


добавление параметров к запросу

In [48]:
url = 'http://httpbin.org/ip'
params = {'key1': 'value1', 'key2': 'value2'}
response = r.get(url=url, 
                 headers={'user-agent': ua.random},
                 params=params,
                 timeout=2)
response.url

'http://httpbin.org/ip?key1=value1&key2=value2'

скачивание видео файла

In [55]:
import requests as r
from os.path import getsize
from time import time


def download_mp4():
    try:
        response = r.get(url=url, stream=True)
    except Exception as ex:
        return f'request error:\n{ex}'

    with open('./data/video_1.mp4', 'wb') as file_mp4:
        file_mp4.write(response.content)

    size = getsize('./data/video_1.mp4')
    return f'video is downloaded ({size / (1024 * 1024)})\npls, check video_1.mp4 file'


url = 'https://parsinger.ru/video_downloads/videoplayback.mp4'
s = time()
print(download_mp4())
e = time()

print(f'execution time: {(e - s):.3f} seconds')

video is downloaded (11.252302169799805)
pls, check video_1.mp4 file
execution time: 33.827 seconds


поиск статуса 200

In [59]:
for i in range(1, 501):
    response = r.get(f'https://parsinger.ru/task/1/{i}.html')
    if response.status_code == 200:
        print(response.text)
        break


<!DOCTYPE html>
<html lang="en">
<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>Document</title>
</head>
<body>
	9876316843187416358741341687416874165432
</body>
</html>


тусовочка с содержимым response

https://jsonplaceholder.typicode.com/todos/

In [61]:
import requests as r


response = r.get(url='https://jsonplaceholder.typicode.com/todos/')
response.json()[:2]

[{'userId': 1, 'id': 1, 'title': 'delectus aut autem', 'completed': False},
 {'userId': 1,
  'id': 2,
  'title': 'quis ut nam facilis et officia qui',
  'completed': False}]

скачивание изображений

In [62]:
import requests as r
from os.path import getsize

data = {}
for i in range(1, 161):
    response = r.get(url=f'https://parsinger.ru/img_download/img/ready/{i}.png')
    size = len(response.content)
    if size in data:
        data[size][0].append(1)
        data[size][1].append(i)
    else:
        data[size] = (list(), list())

print(sorted(data.items(), key=lambda x: x[0]))

TypeError: 'tuple' object does not support item assignment