# Hail-a-TAXII Notebook
This notebook contains Python script interaction with hailataxii.com website:

```bash
curl --location --request POST 'http://hailataxii.com/taxii-discovery-service' \
--header 'X-TAXII-Content-Type: urn:taxii.mitre.org:message:xml:1.1' \
--header 'X-TAXII-Accept: urn:taxii.mitre.org:message:xml:1.1' \
--header 'X-TAXII-Services: urn:taxii.mitre.org:services:1.1' \
--header 'X-TAXII-Protocol: urn:taxii.mitre.org:protocol:http:1.0' \
--header 'Content-Type: application/xml' \
--data-raw '<Discovery_Request xmlns="http://taxii.mitre.org/messages/taxii_xml_binding-1.1" message_id="1"/>'
```

May need to run as this: `jupyter notebook --NotebookApp.iopub_data_rate_limit=1.0e10`

In [None]:
import requests
import re, string
from xml.dom import minidom
import datetime
import xml.dom.minidom as dom
from datetime import datetime, timedelta

from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession
import pandas as pandas

In [None]:
spark = SparkSession.builder.appName('TAXII 1.1 Notebook').getOrCreate()
sc    = spark.sparkContext

In [None]:
def get_discovery_url():
    return 'http://hailataxii.com/taxii-discovery-service'

In [None]:
def get_poll_url():
    return 'http://hailataxii.com/taxii-data'

In [None]:
def remove_xmlns(xml):
    reg = "(?<=%s).*?(?=%s)" % ('{','}')
    r = re.compile(reg,re.DOTALL)
    return r.sub('', xml).replace('{}','')


In [None]:
def get_req_headers():
    return {'Content-type':'application/xml',
            'X-TAXII-Content-Type':'urn:taxii.mitre.org:message:xml:1.1',
            'X-TAXII-Accept':'urn:taxii.mitre.org:message:xml:1.1',
            'X-TAXII-Services':'urn:taxii.mitre.org:services:1.1',
            'X-TAXII-Protocol':'urn:taxii.mitre.org:protocol:http:1.0'}

In [None]:
def get_discovery_data():
    document = dom.Document()
    root = document.createElementNS('xmlns:taxii_11','taxii_11:Discovery_Request')
    root.setAttribute('xmlns:taxii_11','http://taxii.mitre.org/messages/taxii_xml_binding-1.1')
    root.setAttribute('message_id','1')
    return root.toprettyxml()

In [None]:
# <taxii_11:Collection_Information_Request xmlns:taxii_11="http://taxii.mitre.org/messages/taxii_xml_binding-1.1" 
# message_id="26300"/>
def get_collection_request(msg_id):
    document = dom.Document()
    root = document.createElementNS('xmlns:taxii_11','taxii_11:Collection_Information_Request')
    root.setAttribute('xmlns:taxii_11','http://taxii.mitre.org/messages/taxii_xml_binding-1.1')
    root.setAttribute('message_id',str(msg_id))
    return root.toprettyxml()

In [None]:
xmlns_binding = 'http://taxii.mitre.org/messages/taxii_xml_binding-1.1'
xmlns_taxii_11 = 'xmlns:taxii_11'
def get_poll_request(begin_time, end_time, collection_name, sync, resp_type):
    document = dom.Document()
    root = document.createElementNS(xmlns_binding,'taxii_11:Poll_Request')
    root.setAttribute(xmlns_taxii_11,xmlns_binding)
    root.setAttribute('message_id','1')
    root.setAttribute('collection_name',collection_name)
    
    elem = document.createElementNS(xmlns_binding,'taxii_11:Exclusive_Begin_Timestamp')
    text = document.createTextNode(begin_time.replace(hour=0, minute=0, second=0, microsecond=0).isoformat()+'Z')
    elem.appendChild(text)
    root.appendChild(elem)

    elem = document.createElementNS(xmlns_binding,'taxii_11:Inclusive_End_Timestamp')
    text = document.createTextNode(end_time.replace(hour=0, minute=0, second=0, microsecond=0).isoformat()+'Z')
    elem.appendChild(text)
    root.appendChild(elem)

    elem = document.createElementNS(xmlns_binding,'taxii_11:Poll_Parameters')
    elem.setAttribute('allow_asynch', str(sync).lower())
    resp_type_elem = document.createElementNS(xmlns_binding,'taxii_11:Response_Type')
    text = document.createTextNode(resp_type)
    resp_type_elem.appendChild(text)
    elem.appendChild(resp_type_elem)
    root.appendChild(elem)
    
    return root.toprettyxml()

In [None]:
def get_content_blocks(root):
    content_blocks = []
    for i in range(0,len(root.childNodes)):
        node = root.childNodes[i]
        if node.nodeType != node.TEXT_NODE:
            if node.tagName == 'taxii_11:Content_Block':
                content_blocks.append(node)
    return content_blocks

def get_contents(content_block):
    contents = []
    for i in range(0,len(content_block.childNodes)):
        node = content_block.childNodes[i]
        if node.nodeType != node.TEXT_NODE:
            if node.tagName == 'taxii_11:Content':
                contents.append(node)
    return contents

def get_stixes(content):
    stixes = []
    for i in range(0,len(content.childNodes)):
        node = content.childNodes[i]
        if node.nodeType != node.TEXT_NODE:
            if node.tagName == 'stix:STIX_Package':
                stixes.append(node)
    return stixes

In [None]:
# discovery_data = '<Discovery_Request xmlns="http://taxii.mitre.org/messages/taxii_xml_binding-1.1" message_id="1"/>'
req_headers = get_req_headers()
discovery_data = get_discovery_data()
print(discovery_data)

In [None]:
# Invoke discovery service ...
resp = requests.post(get_discovery_url(), data=get_discovery_data(), headers=get_req_headers())
resp

In [None]:
import pandas as pandas
cookies = []
for cookie in resp.cookies:
    cookies.append(cookie)
    
resp_list = []
resp_list.append(['apparent_encoding',resp.apparent_encoding])
resp_list.append(['content',str(resp.content)])
resp_list.append(['cookies',str(cookies)])
resp_list.append(['elapsed',str(resp.elapsed)])
resp_list.append(['headers',str(resp.headers)])
resp_list.append(['encoding',resp.encoding])
resp_list.append(['history',str(resp.history)])
resp_list.append(['is_permanent_redirect',str(resp.is_permanent_redirect)])
resp_list.append(['is_redirect',str(resp.is_redirect)])
resp_list.append(['links',str(resp.links)])
resp_list.append(['next',str(resp.next)])
resp_list.append(['ok',str(resp.ok)])
resp_list.append(['reason',str(resp.reason)])
resp_list.append(['request',str(resp.request)])
resp_list.append(['request.body',str(resp.request.body)])
resp_list.append(['request.copy',str(resp.request.copy)])
resp_list.append(['request.deregister_hook',str(resp.request.deregister_hook)])
resp_list.append(['request.headers',str(resp.request.headers)])
resp_list.append(['request.hooks',str(resp.request.hooks)])
resp_list.append(['request.path_url',str(resp.request.path_url)])
resp_list.append(['request.prepare',str(resp.request.prepare)])
resp_list.append(['request.prepare_auth',str(resp.request.prepare_auth)])
resp_list.append(['request.prepare_body',str(resp.request.prepare_body)])
resp_list.append(['request.prepare_content_length',str(resp.request.prepare_content_length)])
resp_list.append(['request.prepare_cookies',str(resp.request.prepare_cookies)])
resp_list.append(['request.prepare_headers',str(resp.request.prepare_headers)])
resp_list.append(['request.prepare_hooks',str(resp.request.prepare_hooks)])
resp_list.append(['request.prepare_method',str(resp.request.prepare_method)])
resp_list.append(['request.prepare_url',str(resp.request.prepare_url)])
resp_list.append(['request.register_hook',str(resp.request.register_hook)])
resp_list.append(['status_code',str(resp.status_code)])
resp_list.append(['text',str(resp.text)])
resp_list.append(['url',str(resp.url)])

resp_list

pd_resp = spark.createDataFrame(resp_list).toPandas()
# pd_resp = spark.createDataFrame(list(resp_list)).toPandas()
pd_resp


In [None]:
resp_xml = dom.parseString(resp.content).documentElement
print(resp_xml.toprettyxml(indent='  '))


In [None]:
request_data = get_collection_request(26300)
data_url = get_poll_url()
print(request_data)
print(data_url)
resp = requests.post(data_url, data=request_data, headers=get_req_headers())
resp

In [None]:
resp_xml = dom.parseString(resp.content).documentElement
print(resp_xml.toprettyxml(indent='  '))

In [None]:
def get_poll_request(begin_time, end_time, collection_name, sync, resp_type):
    document = dom.Document()
    root = document.createElementNS('xmlns:taxii_11','taxii_11:Poll_Request')
    root.setAttribute('xmlns:taxii_11','http://taxii.mitre.org/messages/taxii_xml_binding-1.1')
    root.setAttribute('message_id','1')
    root.setAttribute('collection_name',collection_name)
    
    elem = document.createElementNS('xmlns:taxii_11','taxii_11:Exclusive_Begin_Timestamp')
    text = document.createTextNode(begin_time.replace(hour=0, minute=0, second=0, microsecond=0).isoformat()+'Z')
    elem.appendChild(text)
    root.appendChild(elem)

    elem = document.createElementNS('xmlns:taxii_11','taxii_11:Inclusive_End_Timestamp')
    text = document.createTextNode(end_time.replace(hour=0, minute=0, second=0, microsecond=0).isoformat()+'Z')
    elem.appendChild(text)
    root.appendChild(elem)

    elem = document.createElementNS('xmlns:taxii_11','taxii_11:Poll_Parameters')
    elem.setAttribute('allow_asynch', str(sync))
    resp_type_elem = document.createElementNS('xmlns:taxii_11','taxii_11:Response_Type')
    text = document.createTextNode(resp_type)
    resp_type_elem.appendChild(text)
    elem.appendChild(resp_type_elem)
    root.appendChild(elem)
    
    return root.toprettyxml()

In [None]:

today = datetime.today()
yesterday = today + timedelta(days=-2)
print (yesterday, today)

poll_request = get_poll_request(yesterday, today, 'guest.dataForLast_7daysOnly', False, 'FULL')
print(poll_request)


In [None]:
# get content collection 
resp = requests.post(get_poll_url(), data=poll_request, headers=get_req_headers())
resp.content


In [None]:
result = dom.parseString(resp.content).documentElement
print(result.toprettyxml(indent='  '))

In [None]:


for content_block in get_content_blocks(result):
    print(content_block.tagName)

In [None]:


all_contents = []
for content_block in get_content_blocks(result):
    for content in get_contents(content_block):
        all_contents.append(content)


In [None]:


all_stixes = []
for content in all_contents:
    for stix in get_stixes(content):
        all_stixes.append(stix)
        print(stix.toprettyxml())

In [None]:
stixes = {}
for stix in all_stixes:
    stixes[stix.getAttribute('id')] = stix
len(stixes)

In [None]:
stixes

In [None]:
print(f'there are {len(all_stixes)} STIXes')
# for child in root.getchildren():
#     print(child.tag)

In [None]:
with open ('tmp.xml', 'w') as writer:
    writer.write(str(resp.content))


## Scratch book

In [None]:

    
for i in range(0,len(result.childNodes)):
    node = result.childNodes[i]
    if node.nodeType != node.TEXT_NODE:
        print(node.tagName + ":")
        for attrName, attrValue in node.attributes.items():
            print('\t', attrName, attrValue)
        for j in range (0, len(node.childNodes)):
            child_node = node.childNodes[j]
            if child_node.nodeType != node.TEXT_NODE:
                print('\t', child_node.tagName + ':')
                for attrName, attrValue in child_node.attributes.items():
                    print('\t\t', attrName, attrValue)


In [None]:
json_obj = {}

root = ET.fromstring(resp.content)
print(remove_xmlns(root.tag))
for child in root.getchildren():
    tag_name = remove_xmlns(child.tag)
    print(f'  {tag_name}')
    xml_attribs_to_json(child)
    print(child.attrib)
#     print(remove_xmlns(child.tag), remove_xmlns(child.attrib))

In [None]:
dir(resp)

In [None]:
dir(root)

In [None]:
dir(root.getchildren()[0].attrib)
# root.getchildren()[0].attrib

In [None]:
data = '<Poll_Request collection_name="guest.dataForLast_7daysOnly" message_id="1" xmlns:taxii_11="http://taxii.mitre.org/messages/taxii_xml_binding-1.1"> \
  <Exclusive_Begin_Timestamp>2021-01-01T00:00:00Z</Exclusive_Begin_Timestamp> \
  <Inclusive_End_Timestamp>2021-01-10T23:00:00Z</Inclusive_End_Timestamp> \
  <Poll_Parameters allow_asynch="false"> \
    <Response_Type>FULL</Response_Type> \
  </Poll_Parameters> \
</Poll_Request>'

# data = '<taxii_11:Poll_Request \
#     xmlns:taxii_11="http://taxii.mitre.org/messages/taxii_xml_binding-1.1" \
#     message_id="1" \
#     collection_name="guest.dataForLast_7daysOnly"> \
#     <taxii_11:Exclusive_Begin_Timestamp>2021-01-01T00:00:00Z</taxii_11:Exclusive_Begin_Timestamp> \
#     <taxii_11:Inclusive_End_Timestamp>2021-01-10T23:00:00Z</taxii_11:Inclusive_End_Timestamp> \
#     <taxii_11:Poll_Parameters allow_asynch="false"> \
#         <taxii_11:Response_Type>FULL</taxii_11:Response_Type> \
#     </taxii_11:Poll_Parameters> \
# </taxii_11:Poll_Request>'


resp = requests.post(get_poll_url(), data=data, headers=headers)
resp.content

In [None]:
misp_upload_stix_result = '{\n    "Event": {\n        "id": "475",\n        "orgc_id": "1",\n        "org_id": "1",\n        "date": "2021-01-20",\n        "threat_level_id": "4",\n        "info": "Imported from external STIX event",\n        "published": false,\n        "uuid": "8ef9f50b-8f75-4c97-987a-8a198a0eaea6",\n        "attribute_count": "2",\n        "analysis": "0",\n        "timestamp": "1611254908",\n        "distribution": "1",\n        "proposal_email_lock": false,\n        "user_id": "8",\n        "locked": false,\n        "publish_timestamp": "0",\n        "sharing_group_id": "0",\n        "disable_correlation": false,\n        "extends_uuid": "",\n        "event_creator_email": "kiet.tran@us-dde.net"\n    },\n    "User": {\n        "email": "kiet.tran@us-dde.net",\n        "id": "8"\n    },\n    "ThreatLevel": {\n        "name": "Undefined",\n        "id": "4"\n    },\n    "Attribute": [],\n    "ShadowAttribute": [],\n    "Object": [\n        {\n            "id": "27",\n            "name": "original-imported-file",\n            "meta-category": "file",\n            "description": "Object describing the original file used to import data in MISP.",\n            "template_uuid": "4cd560e9-2cfe-40a1-9964-7b2e797ecac5",\n            "template_version": "2",\n            "event_id": "475",\n            "uuid": "29c34a15-d46f-45b4-9d08-b3d3eb533329",\n            "timestamp": "1611254908",\n            "distribution": "5",\n            "sharing_group_id": "0",\n            "comment": "",\n            "deleted": false,\n            "first_seen": null,\n            "last_seen": null,\n            "ObjectReference": [],\n            "Attribute": [\n                {\n                    "id": "36591",\n                    "type": "attachment",\n                    "category": "External analysis",\n                    "to_ids": false,\n                    "uuid": "d33de582-0796-44fd-8e21-c8b2d75b2884",\n                    "event_id": "475",\n                    "distribution": "5",\n                    "timestamp": "1611254908",\n                    "comment": "",\n                    "sharing_group_id": "0",\n                    "deleted": false,\n                    "disable_correlation": true,\n                    "object_id": "27",\n                    "object_relation": "imported-sample",\n                    "first_seen": null,\n                    "last_seen": null,\n                    "value": "uploaded_stix_file.xml",\n                    "AttributeTag": [],\n                    "Galaxy": [],\n                    "ShadowAttribute": []\n                },\n                {\n                    "id": "36592",\n                    "type": "text",\n                    "category": "Other",\n                    "to_ids": false,\n                    "uuid": "095518ac-de7b-42ed-8177-fb8f8ddf1a8a",\n                    "event_id": "475",\n                    "distribution": "5",\n                    "timestamp": "1611254908",\n                    "comment": "",\n                    "sharing_group_id": "0",\n                    "deleted": false,\n                    "disable_correlation": true,\n                    "object_id": "27",\n                    "object_relation": "format",\n                    "first_seen": null,\n                    "last_seen": null,\n                    "value": "STIX 1.1",\n                    "AttributeTag": [],\n                    "Galaxy": [],\n                    "ShadowAttribute": []\n                }\n            ]\n        }\n    ],\n    "EventTag": [\n        {\n            "id": "2236",\n            "event_id": "475",\n            "tag_id": "14",\n            "local": false,\n            "Tag": {\n                "id": "14",\n                "name": "tlp:white",\n                "colour": "#ffffff",\n                "exportable": true,\n                "org_id": "0",\n                "user_id": "0",\n                "hide_tag": false,\n                "numerical_value": null,\n                "local": 0\n            }\n        }\n    ],\n    "Orgc": {\n        "id": "1",\n        "name": "CSS",\n        "uuid": "9b2f73af-e32f-43c0-8830-cee7414b9bb7",\n        "local": true\n    },\n    "Org": {\n        "id": "1",\n        "name": "CSS",\n        "uuid": "9b2f73af-e32f-43c0-8830-cee7414b9bb7",\n        "local": true\n    },\n    "Galaxy": [],\n    "RelatedEvent": [],\n    "Sighting": []\n}'

print(misp_upload_stix_result)  