In [39]:
import requests
import json
import os


url = r"https://ntrs.nasa.gov"
url_search = r"https://ntrs.nasa.gov/api/citations/search"
search_query = '(3d|"3 d"|"3-d"|three*dimension|3*dimension)+("woven"|weav*)+(“textile”|fib*|”composite”|”component|”plastic”)+(torsi*|bend*|mechanic*|compres*|tensi*|flex*|impact)+( stress|load*|force*|strain|propert*|failure|fatique|damage)+(“z-“|orthogonal|angle|interlock)-(nonwoven|print*|*bio*|therm*|concrete)'

max_data_per_page = 100

counter_for_recieved_results = 0

results = []

while True:

  data = {
      "page": {
      "size": max_data_per_page,
      "from": counter_for_recieved_results
      },
    "q": search_query,
  }
  r = requests.post(url_search, json=data)

  data = r.json()

  # print some stats
  print(data.keys())
  print(data['stats'])

  # print the number of elements in data['results']
  print("num of results in response: " + str(len(data['results'])))

  # add the number of elements in data['results'] to the counter
  counter_for_recieved_results += len(data['results'])

  # print the counter
  print("counter: " + str(counter_for_recieved_results))

  # add the results to the results list
  results += data['results']

  # if the counter is equal to the total number of results, break the loop
  if counter_for_recieved_results == data['stats']['total']:
    break

# save results to json file with tab indentation
with open('data.json', 'w') as outfile:
  json.dump(results, outfile, indent=4)

# print the number of results
print("total number of results: " + str(len(results)))


dict_keys(['stats', 'results', 'aggregations'])
{'took': 971, 'total': 483, 'estimate': False, 'maxScore': 440.5776}
num of results in response: 100
counter: 100
dict_keys(['stats', 'results', 'aggregations'])
{'took': 942, 'total': 483, 'estimate': False, 'maxScore': 440.55014}
num of results in response: 100
counter: 200
dict_keys(['stats', 'results', 'aggregations'])
{'took': 1021, 'total': 483, 'estimate': False, 'maxScore': 440.5776}
num of results in response: 100
counter: 300
dict_keys(['stats', 'results', 'aggregations'])
{'took': 973, 'total': 483, 'estimate': False, 'maxScore': 440.55014}
num of results in response: 100
counter: 400
dict_keys(['stats', 'results', 'aggregations'])
{'took': 1053, 'total': 483, 'estimate': False, 'maxScore': 440.55014}
num of results in response: 83
counter: 483
total number of results: 483


In [40]:
# copy the results to a new list
results_copy = results.copy()

# # print all keys of the first result
# print(results_copy[0].keys())

# find all unique values for the key 'stiType'
stiTypes = set()
for result in results_copy:
    stiTypes.add(result['stiType'])

# print the unique values
print(stiTypes)

# write the unique values sorted to a json file with stiType as key and an empty string as value
with open('stiTypes.json', 'w') as outfile:
  json.dump(dict.fromkeys(sorted(stiTypes), ''), outfile, indent=4)



{'CONFERENCE_PAPER', 'ACCEPTED_MANUSCRIPT', 'CONFERENCE_PROCEEDINGS', 'PREPRINT', 'CONTRACTOR_OR_GRANTEE_REPORT', 'SPECIAL_PUBLICATION', 'REPRINT', 'THESIS_DISSERTATION', 'CONTRACTOR_REPORT', 'TECHNICAL_MEMORANDUM', 'BOOK_CHAPTER', 'OTHER', 'PRESENTATION', 'TECHNICAL_PUBLICATION'}


In [41]:
# write the result of one result of each stiType to a json file
results_per_stiType = {}
for stiType in stiTypes:
    for result in results_copy:
        if result['stiType'] == stiType:
            results_per_stiType[stiType] = result
            break

with open('results_per_stiType.json', 'w') as outfile:
    json.dump(results_per_stiType, outfile, indent=4)
    
    

In [64]:
def authors(result):
    authors_list = []
    for author in result['authorAffiliations']:
        author_str = author['meta']['author']['name']
        # if key organization exists and is not empty, add it to the author string
        if 'organization' in author['meta'] and author['meta']['organization'] != {}:
            author_str += " ("
            
            try:
                author_str += author['meta']['organization']['name'] 
            except:
                pass
            
            try:
                author_str += ", " + author['meta']['organization']['location']
            except:
                pass
            author_str += ")"

        authors_list.append(author_str)

    # convert list to string with comma as separator and return
    return '; '.join(authors_list)

def date_string(result):


    # get the first publication date if the key 'publications' and 'publicationDate' exists
    if 'publications' in result:
        if 'publicationDate' in result['publications'][0]:
            date = result['publications'][0]['publicationDate']
        # else get the "submittedDate"
        else:
            date = result['submittedDate']
    # else get the "submittedDate"
    else:
        date = result['submittedDate']
    
    return date

def year(result):
    date = date_string(result)
    
    year = date.split('-')[0]
    return year

def month(result):
    date = date_string(result)
    
    month = date.split('-')[1]
    if month == '01':
        return 'January'
    elif month == '02':
        return 'February'
    elif month == '03':
        return 'March'
    elif month == '04':
        return 'April'
    elif month == '05':
        return 'May'
    elif month == '06':
        return 'June'
    elif month == '07':
        return 'July'
    elif month == '08':
        return 'August'
    elif month == '09':
        return 'September'
    elif month == '10':
        return 'October'
    elif month == '11':
        return 'November'
    elif month == '12':
        return 'December'
    else:
        return ''


def categories(result):

    categories_list = result['subjectCategories']

    # convert list to string with comma as separator and return
    return '; '.join(categories_list)

def keywords(result):

    keywords_list = result['keywords']

    # convert list to string with comma as separator and return
    return '; '.join(keywords_list)
        

def abstract(result):
    abstract_str = result['abstract']

    # dict of unicode characters to replace and their replacement
    unicode_dict = {
        "\u02d9": "ff",
        "\u2212": "-",
        "\u0398": "Theta"
    }

    # replace unicode characters
    for unicode_char in unicode_dict:
        abstract_str = abstract_str.replace(unicode_char, unicode_dict[unicode_char])
    
    return abstract_str

def reportNumber(result):
    reportNumber_list = result['otherReportNumbers']

    # convert list to string with semicolon as separator and return
    return '; '.join(reportNumber_list)



In [66]:
# load stiTypes assigned to bibtex types from stiTypes_assigned.json
with open('stiTypes_assigned.json') as json_file:
    stiTypes_assigned = json.load(json_file)

# write the results to a bibtec file
with open('data.bib', 'w') as outfile:
    for result in results_copy:
        # print ID and title
        print(result['id'], result['title'])

        # the bibtex type is the stiType of the result if it is in stiTypes_assigned
        if result['stiType'] in stiTypes_assigned:
            bibtex_type = stiTypes_assigned[result['stiType']]
        # otherwise the bibtex type is 'misc'
        else:
            bibtex_type = 'misc'

        # write the bibtex type and the id of the result to the file
        outfile.write('@' + bibtex_type + '{' + str(result['id']) + ',\n')

        # write the title of the result to the file
        outfile.write('\ttitle = "' + result['title'] + '",\n')

        # write the authors of the result to the file if the key 'authorAffiliations' exists
        if 'authorAffiliations' in result:
            outfile.write('\tauthor = "' + authors(result) + '",\n')

        # write the year of the result to the file
        outfile.write('\tyear = "' + str(year(result)) + '",\n')

        # write the month of the result to the file
        outfile.write('\tmonth = "' + str(month(result)) + '",\n')

        # write the url of the result to the file if key downloads exists and is not empty list
        if 'downloads' in result and result['downloads']:
            outfile.write('\turl = "' + url + result['downloads'][0]['links']['original'] + '",\n')

        # write the abstract of the result to the file if key abstract exists
        if 'abstract' in result:
            outfile.write('\tabstract = "' + abstract(result) + '",\n')

        # write the keywords of the result to the file if the key exists
        if 'keywords' in result:
            outfile.write('\tkeywords = "' + keywords(result) + '",\n')

        # write the categories of the result to the file if the key exists
        if 'subjectCategories' in result:
            outfile.write('\tcategories = "' + categories(result) + '",\n')

        # write the DocumentType of the result to the file if the key exists
        if 'stiTypeDetails' in result:
            outfile.write('\tdocumentType = "' + result['stiTypeDetails'] + '",\n')

        # write the DOI of the result to the file if the key exists and is not empty
        if 'sourceIdentifiers' in result and result['sourceIdentifiers']:
            if 'doi' in result['sourceIdentifiers'][0]['number'] or 'DOI' in result['sourceIdentifiers'][0]['type']:
                outfile.write('\tdoi = "' + result['sourceIdentifiers'][0]['number'] + '",\n')

        # write booktitle of the result to the file if the bibtex type is 'inbook'
        if bibtex_type == 'inbook':
            outfile.write('\tbooktitle = "' + result['publications'][0]['publicationName'] + '",\n')
        elif bibtex_type == 'inproceedings' or bibtex_type == 'conference':
            if 'meetings' in result:
                outfile.write('\tbooktitle = "' + result['meetings'][0]['name'] + '",\n')
            elif 'publications' in result:
                if 'publicationName' in result['publications'][0]:
                    outfile.write('\tbooktitle = "' + result['publications'][0]['publicationName'] + '",\n')
        
        # write the publisher of the result to the file if the key exists
        if 'publications' in result:
            if 'publisher' in result['publications'][0]:
                outfile.write('\tpublisher = "' + result['publications'][0]['publisher'] + '",\n')
            elif 'meetings' in result:
                if 'sponsor' in result['meetings'][0]:
                    outfile.write('\tpublisher = "' + result['meetings'][0]['sponsors'][0]['meta']['organization']['name'] + '",\n')
        elif 'meetings' in result:
            if 'sponsor' in result['meetings'][0]:
                outfile.write('\tpublisher = "' + result['meetings'][0]['sponsors'][0]['meta']['organization']['name'] + '",\n')
            
        # write the address of the result to the file if the key exists
        if 'meetings' in result:
            if 'location' in result['meetings'][0]:
                outfile.write('\taddress = "' + result['meetings'][0]['location'] + '",\n')

        # write the report number if bibtype is 'techreport'
        if bibtex_type == 'techreport':
            outfile.write('\tnumber = "' + reportNumber(result) + '",\n')

        
        # strip the last comma and newline from the file
        outfile.seek(outfile.tell() - 2, os.SEEK_SET)
        outfile.truncate()

        # write closing bracket to the file
        outfile.write('\n}\n\n')


19940012378 Effect of tow alignment on the mechanical performance of 3D woven textile composites
19960015567 Strain Gage Selection Criteria for Textile Composite Materials
19960021804 Experimental Investigation of Textile Composite Materials Using Moire Interferometry
19960015568 Compression Testing of Textile Composite Materials
20210025430 Experimental Observations of Damage States in Unnotched and Notched 3D Orthogonal Woven Coupons Loaded in Tension
19920004634 Mechanical properties of 2D and 3D braided textile composites
20150022116 Incorporation of Plasticity and Damage Into an Orthotropic Three-Dimensional Model with Tabulated Input Suitable for Use in Composite Impact Problems
20150019390 Incorporation of Plasticity and Damage Into an Orthotropic Three-Dimensional Model with Tabulated Input Suitable for Use in Composite Impact Problems
19940012373 An overview of the NASA textile composites program
19960010541 Out-of-plane properties
20160010260 Analysis and Characterization of 