# cURL

In [16]:
!curl http://httpbin.org/ip

{
  "origin": "203.237.172.100"
}


In [17]:
!curl http://httpbin.org/get

{
  "args": {}, 
  "headers": {
    "Accept": "*/*", 
    "Host": "httpbin.org", 
    "User-Agent": "curl/7.71.1", 
    "X-Amzn-Trace-Id": "Root=1-6136fed5-2f2af59837c12cb95d822723"
  }, 
  "origin": "203.237.172.100", 
  "url": "http://httpbin.org/get"
}


In [19]:
!curl http://httpbin.org/get\?myname\='js'

{
  "args": {
    "myname": "js"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Host": "httpbin.org", 
    "User-Agent": "curl/7.71.1", 
    "X-Amzn-Trace-Id": "Root=1-6136fef7-6afdfcf9225c975b30a56827"
  }, 
  "origin": "203.237.172.100", 
  "url": "http://httpbin.org/get?myname=js"
}


# requests

In [23]:
import requests
r = requests.options("http://httpbin.org/")

r.headers["allow"]

'HEAD, OPTIONS, GET'

In [24]:
r.status_code

200

In [25]:
bad_r = requests.get('http://httpbin.org/status/404')
bad_r.status_code

404

# POST

- POST는 HTML form에서 입력을 받아 전소하는 경우에 사용한다.
- 아래와 같이 data에 전송할 데이터를 dictionary 형식으로 준다.

In [26]:
r = requests.post("http://httpbin.org/post", data={'name': 'js'})
r.json()

{'args': {},
 'data': '',
 'files': {},
 'form': {'name': 'js'},
 'headers': {'Accept': '*/*',
  'Accept-Encoding': 'gzip, deflate, br',
  'Content-Length': '7',
  'Content-Type': 'application/x-www-form-urlencoded',
  'Host': 'httpbin.org',
  'User-Agent': 'python-requests/2.26.0',
  'X-Amzn-Trace-Id': 'Root=1-6137005b-7581d85d3d0ad42167f9cdec'},
 'json': None,
 'origin': '203.237.172.100',
 'url': 'http://httpbin.org/post'}

In [27]:
r = requests.head("http://httpbin.org/")
r.headers

{'Date': 'Tue, 07 Sep 2021 06:06:22 GMT', 'Content-Type': 'text/html; charset=utf-8', 'Content-Length': '9593', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true'}

In [28]:
r.headers['Server']

'gunicorn/19.9.0'

In [33]:
import urllib

class HeadRequest(urllib.request.Request):
    def get_method(self):
        return "HEAD"
    
response = urllib.request.urlopen(HeadRequest("http://httpbin.org"))
print(response.info())
print(response.geturl())

Date: Tue, 07 Sep 2021 06:10:53 GMT
Content-Type: text/html; charset=utf-8
Content-Length: 9593
Connection: close
Server: gunicorn/19.9.0
Access-Control-Allow-Origin: *
Access-Control-Allow-Credentials: true


http://httpbin.org


In [48]:
xml_data = '''
<students>               
    <student x="1">      
        <id>001</id>     
        <name>Kim</name>
    </student>
    <student x="2">
        <id>002</id>
        <name>Lee</name>
    </student>
</students>
'''

In [49]:
import lxml.etree
root = lxml.etree.fromstring(xml_data)
root

<Element students at 0x7fa0d886aac0>

In [50]:
from io import StringIO
tree=lxml.etree.parse(StringIO(xml_data))
root=tree.getroot()
root

<Element students at 0x7fa0fb8aa940>

In [51]:
import os
os.getcwd()

'/Users/joono/Desktop/joono/Bigdata-Analysis'

In [52]:
%%writefile data/ds_open_hello.xml
<students>
<student x="1">
    <id>001</id>
    <name>Kim</name>
</student>
<student x="2">
    <id>002</id>
    <name>Lee</name>
</student>
</students>

Overwriting data/ds_open_hello.xml


In [53]:
import os
tree=lxml.etree.parse(os.path.join('data','ds_open_hello.xml'))

In [54]:
root = tree.getroot()
root.tag

'students'

In [55]:
for element in root:
    for elem in element:
        print(elem.tag)

id
name
id
name


In [56]:
for node in root.getiterator():
    print(f"tag: {node.tag}, attribute: {node.attrib}, text: {node.text}")

tag: students, attribute: {}, text: 

tag: student, attribute: {'x': '1'}, text: 
    
tag: id, attribute: {}, text: 001
tag: name, attribute: {}, text: Kim
tag: student, attribute: {'x': '2'}, text: 
    
tag: id, attribute: {}, text: 002
tag: name, attribute: {}, text: Lee


In [58]:
std = root.find('student')
for node in std:
    print(node.text)

001
Kim


In [59]:
root.xpath('//@x')

['1', '2']

In [66]:
root.xpath('//*[@x=1]/id/text()')


['001']

In [78]:
from lxml.cssselect import CSSSelector

sel = CSSSelector('student')
nodes = sel(root)

for e in nodes:
    print(e.tag)

student
student


In [70]:
sel.path

'descendant-or-self::student'

In [72]:
nodes = sel(root)
print(nodes)

[<Element student at 0x7fa0fb8c8680>, <Element student at 0x7fa0d87bd440>]


In [73]:
sel = CSSSelector('id')
nodes = sel(root)

In [74]:
for e in nodes:
    print(e.text)

001
002


In [81]:
json_data = '''
[
{"id": "001", "x": "2", "name": "Chunk"},
{"id": "009", "x": "7", "name": " Brent"}
]
'''

In [84]:
import json
info = json.loads(json_data)

for item in info:
    print(f"id: {item['id']}, name: {item['name']}")

id: 001, name: Chunk
id: 009, name:  Brent


In [85]:
import json

my_json = json.dumps(
    ["foo", {"bar": ("baz", None, 1.0, 2)}]
)

In [86]:
my_json

'["foo", {"bar": ["baz", null, 1.0, 2]}]'