In [None]:
import requests
import json

In [None]:
# Define the API endpoint for OpenCTI, change 
url = 'https://<YOUR OPENCTI ENDPOINT URL/IP>/graphql'

In [None]:
# Define the headers
headers = {
    'Content-Type': 'application/json',
    'Authorization': 'Bearer <YOU API KEY>'
}

In [None]:
#graphQL query to get all the reports from OpenCTI
query1="""query getAllReports($first: Int, $cursor:ID) {
    reports(first:$first, after:$cursor) {
      edges {
        node {
          id
          standard_id
        }
      }
    pageInfo {
      endCursor
      hasNextPage
    }  
  }
}"""

In [None]:
#query to obtain the reports with their associated MITRE Techniques and External References based of report's ID
query2="""query ReportToTTP(
    $id: String!
    $search: String
    $types: [String]
    $count: Int!
    $cursor: ID
    $orderBy: StixObjectOrStixRelationshipsOrdering
    $orderMode: OrderingMode
  ) {
    container(id: $id) {
      ...ContainerStixDomainObjectsLines_container_4GmerJ
  }
  }
  fragment ContainerStixDomainObjectLine_node on StixDomainObject {
    ... on AttackPattern {
      x_mitre_id
    }
  }
  fragment ContainerStixDomainObjectsLines_container_4GmerJ on Container {
    ... on Report {
        name
        description
        content
        content_mapping
        externalReferences {
          edges {
            node {
              standard_id
              id
              source_name
              url
              description
              created
              representative {
                main
                secondary
              }
            }
          }
        }
      }
    standard_id
    objects(
      types: $types
      search: $search
      first: $count
      after: $cursor
      orderBy: $orderBy
      orderMode: $orderMode
    ) {
      edges {
        node {
          ...ContainerStixDomainObjectLine_node
        }
      }
      pageInfo {
        endCursor
        hasNextPage
        globalCount
      }
    }
  }"""

In [None]:
#obtain list of report IDs in OpenCTI 
variables = {"first": 25000}
payload = {'query': query1 , 'variables': variables}

# Send the request and get the response
response = requests.post(url, headers=headers, json=payload)
response_dict = response.json()

# Print the response
#print(json.dumps(response.json(), indent=4))

In [None]:
#save the list of report IDs, each report has 2 different IDs that reference it
report_list=[]
for edge in response_dict['data']['reports']['edges']:
    for k,v in edge.items():
        report_list.append([v['id'],v['standard_id']])

In [None]:
#number of reports
len(report_list)

In [None]:
#go through the Report List and peform Query #2 for each report ID which obtains MITRE TTPs to each report
data=[]
for report in report_list:
    
    try:    
        #report ID to query, try the first index before using the second one
        rep_id=report[0]
        
        # Define the payload for each report
        variables={"id": rep_id,
         "search":"",
         "types":["Attack-Pattern"],
         "count":5500,
         "orderBy":"name",
         "orderMode":"desc"}
        payload = {'query': query2 , 'variables': variables}

        # Send the request and get the response
        response = requests.post(url, headers=headers, json=payload)
    
        # Append new data
        data.append(response.json())
    #if the above query does not work, try the other ID for the report
    except:
        
        #try the second index report ID
        rep_id=report[1] 
        variables={"id": rep_id,
         "search":"",
         "types":["Attack-Pattern"],
         "count":5500,
         "orderBy":"name",
         "orderMode":"desc"}
        payload = {'query': query2 , 'variables': variables}
        response = requests.post(url, headers=headers, json=payload)
        data.append(response.json())

In [None]:
# Write updated data back to the file
with open('opencti_dataset_references.json', 'w') as f:
    json.dump(data, f, indent=4)