# Requests:

To do: find some Open API â€“ try requests -> json/dict

In [None]:
pip install requests

Collecting requests
  Downloading requests-2.31.0-py3-none-any.whl.metadata (4.6 kB)
Collecting charset-normalizer<4,>=2 (from requests)
  Downloading charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl.metadata (34 kB)
Collecting idna<4,>=2.5 (from requests)
  Downloading idna-3.7-py3-none-any.whl.metadata (9.9 kB)
Collecting urllib3<3,>=1.21.1 (from requests)
  Downloading urllib3-2.2.1-py3-none-any.whl.metadata (6.4 kB)
Collecting certifi>=2017.4.17 (from requests)
  Downloading certifi-2024.2.2-py3-none-any.whl.metadata (2.2 kB)
Downloading requests-2.31.0-py3-none-any.whl (62 kB)
   ---------------------------------------- 0.0/62.6 kB ? eta -:--:--
   ---------------------------------------- 62.6/62.6 kB 3.3 MB/s eta 0:00:00
Downloading certifi-2024.2.2-py3-none-any.whl (163 kB)
   ---------------------------------------- 0.0/163.8 kB ? eta -:--:--
   ---------------------------------------- 163.8/163.8 kB 4.8 MB/s eta 0:00:00
Downloading charset_normalizer-3.3.2-cp312-cp312-win_am

### Making a GET request:

In [20]:
import requests

# The API endpoint
url = "https://jsonplaceholder.typicode.com/posts/1"

# A GET request to the API
response = requests.get(url)

# Print the response
response_json = response.json()
response_json

{'userId': 1,
 'id': 1,
 'title': 'sunt aut facere repellat provident occaecati excepturi optio reprehenderit',
 'body': 'quia et suscipit\nsuscipit recusandae consequuntur expedita et cum\nreprehenderit molestiae ut ut quas totam\nnostrum rerum est autem sunt rem eveniet architecto'}

#### Passing an argument:

In [21]:
# The API endpoint
url = "https://jsonplaceholder.typicode.com/posts/"

# Adding a payload
payload = {
    "id": [1, 2, 3], 
    "userId": 1
    }

# A get request to the API
response = requests.get(url, params=payload)

# Print the response
response_json = response.json()

for i in response_json:
    print(i, "\n")

{'userId': 1, 'id': 1, 'title': 'sunt aut facere repellat provident occaecati excepturi optio reprehenderit', 'body': 'quia et suscipit\nsuscipit recusandae consequuntur expedita et cum\nreprehenderit molestiae ut ut quas totam\nnostrum rerum est autem sunt rem eveniet architecto'} 

{'userId': 1, 'id': 2, 'title': 'qui est esse', 'body': 'est rerum tempore vitae\nsequi sint nihil reprehenderit dolor beatae ea dolores neque\nfugiat blanditiis voluptate porro vel nihil molestiae ut reiciendis\nqui aperiam non debitis possimus qui neque nisi nulla'} 

{'userId': 1, 'id': 3, 'title': 'ea molestias quasi exercitationem repellat qui ipsa sit aut', 'body': 'et iusto sed quo iure\nvoluptatem occaecati omnis eligendi aut ad\nvoluptatem doloribus vel accusantium quis pariatur\nmolestiae porro eius odio et labore et velit aut'} 



### Making a POST request:

In [23]:
# Define new data to create
new_data = {
    "userID": 1,
    "id": 1,
    "title": "Making a POST request",
    "body": "This is the data we created."
}

# The API endpoint to communicate with
url_post = "https://jsonplaceholder.typicode.com/posts"

# A POST request to tthe API
post_response = requests.post(url_post, json=new_data)

# Print the response
post_response_json = post_response.json()
post_response_json

{'userID': 1,
 'id': 101,
 'title': 'Making a POST request',
 'body': 'This is the data we created.'}

### Authenticating requests:

In [24]:
from requests.auth import HTTPBasicAuth

private_url = "https://api.github.com/user"
github_username = "username"
token = "token"

private_url_response = requests.get(
    url=private_url,
    auth=HTTPBasicAuth(github_username, token)
)

private_url_response.status_code

401

### Handling errors:

In [25]:
# A deliberate typo is made in the endpoint "postz" instead of "posts":
url = "https://jsonplaceholder.typicode.com/postz"

# Attempt to GET data from provided endpoint:
try:
    response = requests.get(url)
    response.raise_for_status()
# If the request fails (404) then print the error:
except requests.exceptions.HTTPError as error:
  print(error)

404 Client Error: Not Found for url: https://jsonplaceholder.typicode.com/postz


### Dealing with too many redirects:

In [26]:
url = "https://jsonplaceholder.typicode.com/posts"

try:
  response = requests.get(url)
  response.raise_for_status()
except requests.exceptions.TooManyRedirects as error:
  print(error)

In [27]:
# Solution 2
url = "https://jsonplaceholder.typicode.com/posts"
session = requests.Session()
session.max_redirects = 3
response = session.get(url)

In [28]:
# Solution 3
url = "https://jsonplaceholder.typicode.com/posts"
session = requests.Session()
session.allow_redirects = False
response = session.get(url)

### Connection error:

In [31]:
url = "https://jsonplaceholder.typicode.com/posts"

try:
  response = requests.get(url)
except requests.ConnectionError as error:
  print(error)

### Timeout:

In [30]:
url = "https://jsonplaceholder.typicode.com/posts"

try:
  response = requests.get(url, timeout=0.0001)
except requests.Timeout as error:
  print(error)

HTTPSConnectionPool(host='jsonplaceholder.typicode.com', port=443): Read timed out. (read timeout=0.0001)


### Saving output to JSON file:

In [18]:
# save JSON to file for further inspection
import json
import requests

file = 'json_01' # local path to the file
url = 'https://httpbin.org/basic-auth/user/pass'
user = 'user'
password = 'pass'
token = 2
headers = {
    'Key1': 'Value1',
    'Key2': f'Value2 {token}',
    'Key3': '1.0.0'
    }

# Saving an API output to a variable:
api_output = requests.get(url=url, auth=(user, password)).json()

# Writing a JSON output to a file:
with open(file, 'w') as output:
    output.write(json.dumps(api_output, indent=2))

# Reading a JSON file:
with open(file, 'r') as file:
    print(file.read())

{
  "authenticated": true,
  "user": "user"
}


**1st example:**

In [6]:
import requests

x = requests.get('https://w3schools.com/python/demopage.htm')

print(x.text)

<!DOCTYPE html>
<html>
<body>

<h1>This is a Test Page</h1>

</body>
</html>


In [18]:
print(x.headers['content-type'])

text/html


Converting HTML to JSON:

In [21]:
! pip install html_to_json

Collecting html_to_json
  Downloading html_to_json-2.0.0-py2.py3-none-any.whl.metadata (5.2 kB)
Collecting bs4 (from html_to_json)
  Downloading bs4-0.0.2-py2.py3-none-any.whl.metadata (411 bytes)
Collecting beautifulsoup4 (from bs4->html_to_json)
  Downloading beautifulsoup4-4.12.3-py3-none-any.whl.metadata (3.8 kB)
Collecting soupsieve>1.2 (from beautifulsoup4->bs4->html_to_json)
  Downloading soupsieve-2.5-py3-none-any.whl.metadata (4.7 kB)
Downloading html_to_json-2.0.0-py2.py3-none-any.whl (6.4 kB)
Downloading bs4-0.0.2-py2.py3-none-any.whl (1.2 kB)
Downloading beautifulsoup4-4.12.3-py3-none-any.whl (147 kB)
   ---------------------------------------- 0.0/147.9 kB ? eta -:--:--
   --------------------------------- ------ 122.9/147.9 kB 3.6 MB/s eta 0:00:01
   ---------------------------------------- 147.9/147.9 kB 2.9 MB/s eta 0:00:00
Downloading soupsieve-2.5-py3-none-any.whl (36 kB)
Installing collected packages: soupsieve, beautifulsoup4, bs4, html_to_json
Successfully installe

In [25]:
import requests
import json
import html_to_json

username = 'datasets'
base_url = 'https://www.kaggle.com/'
url = base_url+str(username)

r = requests.get(url)
print(r.status_code)

html_string = r.text
output_json = html_to_json.convert(html_string)
output_json

200


{'_value': 'html',
 'html': [{'_attributes': {'lang': 'en'},
   'head': [{'title': [{'_value': 'Find Open Datasets and Machine Learning Projects | Kaggle'}],
     'meta': [{'_attributes': {'charset': 'utf-8'}},
      {'_attributes': {'name': 'robots', 'content': 'index, follow'}},
      {'_attributes': {'name': 'description',
        'content': 'Download Open Datasets on 1000s of Projects + Share Projects on One Platform. Explore Popular Topics Like Government, Sports, Medicine, Fintech, Food, More. Flexible Data Ingestion.'}},
      {'_attributes': {'name': 'turbolinks-cache-control',
        'content': 'no-cache'}},
      {'_attributes': {'name': 'viewport',
        'content': 'width=device-width, initial-scale=1.0, maximum-scale=5.0, minimum-scale=1.0'}},
      {'_attributes': {'name': 'theme-color', 'content': '#008ABC'},
       'script': [{'_attributes': {'nonce': 'cc2PXnky7nsji9VZAMESlw==',
          'type': 'text/javascript'},
         '_value': 'window["pageRequestStartTime"] =

**2nd example:**

In [28]:
import requests
r = requests.get('https://httpbin.org/basic-auth/user/pass', auth=('user', 'pass'))

In [29]:
# Status code:
print(r.status_code)

200


In [30]:
# Content type: 
print(r.headers['content-type'])

application/json


In [31]:
# Encoding:
print(r.encoding)

utf-8


In [32]:
output_text = r.text
print(output_text)

{
  "authenticated": true, 
  "user": "user"
}



In [46]:
# JSON:

In [35]:
output_json = r.json()
output_json

{'authenticated': True, 'user': 'user'}

In [36]:
output_dict = r.__dict__
output_dict

{'_content': b'{\n  "authenticated": true, \n  "user": "user"\n}\n',
 '_content_consumed': True,
 '_next': None,
 'status_code': 200,
 'headers': {'Date': 'Tue, 23 Apr 2024 15:04:36 GMT', 'Content-Type': 'application/json', 'Content-Length': '47', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true'},
 'raw': <urllib3.response.HTTPResponse at 0x2726a0c9ff0>,
 'url': 'https://httpbin.org/basic-auth/user/pass',
 'encoding': 'utf-8',
 'history': [],
 'reason': 'OK',
 'cookies': <RequestsCookieJar[]>,
 'elapsed': datetime.timedelta(seconds=1, microseconds=2777),
 'request': <PreparedRequest [GET]>,
 'connection': <requests.adapters.HTTPAdapter at 0x2726981f3e0>}

In [44]:
# Keys:
print("Keys:")
for keys in output_dict.keys():
    print(keys)

Keys:
_content
_content_consumed
_next
status_code
headers
raw
url
encoding
history
reason
cookies
elapsed
request
connection


In [45]:
# Values:
print("Values:")
for values in output_dict.values():
    print(values)

Values:
b'{\n  "authenticated": true, \n  "user": "user"\n}\n'
True
None
200
{'Date': 'Tue, 23 Apr 2024 15:04:36 GMT', 'Content-Type': 'application/json', 'Content-Length': '47', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true'}
<urllib3.response.HTTPResponse object at 0x000002726A0C9FF0>
https://httpbin.org/basic-auth/user/pass
utf-8
[]
OK
<RequestsCookieJar[]>
0:00:01.002777
<PreparedRequest [GET]>
<requests.adapters.HTTPAdapter object at 0x000002726981F3E0>


**3rd example:**

In [49]:
import requests

# The API endpoint:
url = "https://jsonplaceholder.typicode.com/posts/1"

# A GET request to the API:
response = requests.get(url)

# Print the response:
response_json = response.json()
response_json

{'userId': 1,
 'id': 1,
 'title': 'sunt aut facere repellat provident occaecati excepturi optio reprehenderit',
 'body': 'quia et suscipit\nsuscipit recusandae consequuntur expedita et cum\nreprehenderit molestiae ut ut quas totam\nnostrum rerum est autem sunt rem eveniet architecto'}