# Imports

In [1]:
import requests  # To get the data
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import asyncio
import aiohttp
from aiohttp import ClientTimeout, ClientError # For timeouts and aiohttp specific errors

from pymongo import MongoClient  # Database to store the data
import json  # File IO
from time import time  # To time the duration of the requests
from time import sleep
from IPython.display import display, clear_output
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# Load Program Data

In [2]:
# Get list of Jira sources
with open('../0. DataDefinition/jira_data_sources.json') as f:
    jira_data_sources = json.load(f)
    
# Connect to the database
db = MongoClient()['JiraRepos']

# Now-Invalid Jiras
INVALID_JIRAS = ['Mindville', 'MariaDB']

# Investigate Jira Data Accessibility

In [None]:
def check_jira_url(jira_url):
    print('')
    print(f"💡 Check Jira: {jira_url}")
    print('')
    ## CHECK PROVIDED JIRA URL AVAILABILITY ##
    print(f'Checking Jira url existence with GET: {jira_url}')
    try:
        requests.head(jira_url)
    except ConnectionError:
        print('❌ Provided Jira base url does not exist')
        return
    else:
        print('✅ Provided Jira base url is reachable')
        
    ## CHECK PROVIDED JIRA URL API AVAILABILITY ##
    response = requests.get(jira_url + '/rest/api/2/issuetype')
    print('')
    print(f'Checking Jira api with GET: {response.url}')
    # Check response code
    if response.status_code < 300:
        print('✅ Jira API returned a successful response')
    else:
        print(response.status_code)
        print(response.text)
        print(response.url)
        print('❌ Jira API did not return a successful response')
        return
    
    ## CHECK NUMBER OF ISSUES ##
    response = requests.get(jira_url + '/rest/api/2/search?jql=&maxResults=0')
    print('')
    print(f"Retrieving total issue count with GET: {response.url}")
    # Check response code
    if response.status_code < 300:
        print(f"Total Number of Issues: {response.json()['total']}")
        print('✅ Jira API returned a successful response')
    else:
        print(response.status_code)
        print(response.text)
        print(response.url)
        print('❌ Jira API did not return a successful response')
        return

In [None]:
# Check all Jira URLs in provided jira_data_sources
for jira_name, jira_obj in jira_data_sources.items():
    
    # Ignore Jiras that we know are now unreachable or empty
    if jira_name in INVALID_JIRAS:
        continue
    
    check_jira_url(jira_obj['jira_url'])

# Download Jira Data

### Helper Functions

In [3]:
def format_duration(start_time, end_time):
    # Get the total seconds of the duration
    seconds = end_time - start_time
    # Calculate the other time 
    milliseconds = int((seconds % 1) * 10000)
    minutes = int(seconds / 60)
    hours   = int(minutes / 60)
    # Trim the values to fit in their appopriate slots
    display_minutes = int(minutes % 60)
    display_seconds = int(seconds % 60)

    return f"{hours:02}:{display_minutes:02}:{display_seconds:02}.{milliseconds:04}"

### Download Jira Issue Type Information

In [8]:
output_json = {}

for jira_name, jira_data in jira_data_sources.items():
    
    # Ignore Jiras that we know are now unreachable or empty
    if jira_name in INVALID_JIRAS:
        print(f"Skipping Issue Type info for {jira_name} (in INVALID_JIRAS).")
        continue

    # Build the URL to get the information from
    jira_issuetype_url = jira_data['jira_url'] + '/rest/api/2/issuetype'

    try:
        print(f"Fetching Issue Type info for {jira_name} from {jira_issuetype_url}")
        response = requests.get(jira_issuetype_url, timeout=30) # Added timeout
        response.raise_for_status() # Raise an exception for HTTP errors (4xx or 5xx)
        # Get the issuetype definitions
        documented_issuetypes = {
            issuetype['name']: issuetype
            for issuetype in response.json()
        }
        # Save the information
        output_json[jira_name] = documented_issuetypes
    except requests.exceptions.RequestException as e:
        print(f"❌ Error fetching Issue Type info for {jira_name}: {e}")
        continue # Continue to the next Jira source
    except json.JSONDecodeError as e:
        print(f"❌ Error decoding JSON for Issue Type info for {jira_name}: {e}")
        continue

Fetching Issue Type info for Apache from https://issues.apache.org/jira/rest/api/2/issuetype
Fetching Issue Type info for Hyperledger from https://jira.hyperledger.org/rest/api/2/issuetype
❌ Error fetching Issue Type info for Hyperledger: HTTPSConnectionPool(host='jira.hyperledger.org', port=443): Max retries exceeded with url: /rest/api/2/issuetype (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x000002453BB7B250>: Failed to resolve 'jira.hyperledger.org' ([Errno 11001] getaddrinfo failed)"))
Fetching Issue Type info for IntelDAOS from https://jira.hpdd.intel.com/rest/api/2/issuetype
❌ Error fetching Issue Type info for IntelDAOS: HTTPSConnectionPool(host='jira.hpdd.intel.com', port=443): Max retries exceeded with url: /rest/api/2/issuetype (Caused by SSLError(SSLCertVerificationError(1, "[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: Hostname mismatch, certificate is not valid for 'jira.hpdd.intel.com'. (_ssl.c:997)")))
Fetching Issue Type 

### Download Jira Issue Link Type Information

In [None]:
# Write the result to a JSON
output_json = {}

for jira_name, jira_data in jira_data_sources.items():
    
    # Ignore Jiras that we know are now unreachable or empty
    if jira_name in INVALID_JIRAS:
        print(f"Skipping Issue Link Type info for {jira_name} (in INVALID_JIRAS).")
        continue

    # Build the URL to get the information from
    jira_issuelinktype_url = jira_data['jira_url'] + '/rest/api/2/issueLinkType'

    try:
        print(f"Fetching Issue Link Type info for {jira_name} from {jira_issuelinktype_url}")
        response = requests.get(jira_issuelinktype_url, timeout=30) # Added timeout
        response.raise_for_status() # Raise an exception for HTTP errors
        # Get the issuelinktype definitions
        data = response.json()
        if 'issueLinkTypes' in data:
            documented_issuelinktypes = {
                issuelinktype['name']: issuelinktype
                for issuelinktype in data['issueLinkTypes']
            }
            # Save the information
            output_json[jira_name] = documented_issuelinktypes
        else:
            print(f"⚠️ 'issueLinkTypes' key not found in response for {jira_name}. Response: {data}")
            output_json[jira_name] = {} # Save empty if key missing but request succeeded

    except requests.exceptions.RequestException as e:
        print(f"❌ Error fetching Issue Link Type info for {jira_name}: {e}")
        continue
    except json.JSONDecodeError as e:
        print(f"❌ Error decoding JSON for Issue Link Type info for {jira_name}: {e}")
        continue

### Download Jira Issue Field Information

In [19]:
jiras_fields_information = {}

for jira_name, jira_data in jira_data_sources.items():
    
    # Ignore Jiras that we know are now unreachable or empty
    if jira_name in INVALID_JIRAS:
        print(f"Skipping Field info for {jira_name} (in INVALID_JIRAS).")
        continue
    
    field_url = f"{jira_data['jira_url']}/rest/api/2/field"
    try:
        print(f"Fetching Field info for {jira_name} from {field_url}")
        # Query Jira for field information
        response = requests.get(field_url, timeout=30) # Added timeout
        response.raise_for_status() # Raise an exception for HTTP errors
        # Store result in JSON
        jiras_fields_information[jira_name] = response.json()
    except requests.exceptions.RequestException as e:
        print(f"❌ Error fetching Field info for {jira_name}: {e}")
        continue
    except json.JSONDecodeError as e:
        print(f"❌ Error decoding JSON for Field info for {jira_name}: {e}")
        continue

# Write JSON to file for later use
with open('jira_field_information.json', 'w', encoding='utf-8') as json_file:
    json.dump(jiras_fields_information, json_file, ensure_ascii=False, indent=4)

Fetching Field info for Apache from https://issues.apache.org/jira/rest/api/2/field
Fetching Field info for Hyperledger from https://jira.hyperledger.org/rest/api/2/field
❌ Error fetching Field info for Hyperledger: HTTPSConnectionPool(host='jira.hyperledger.org', port=443): Max retries exceeded with url: /rest/api/2/field (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x000002457DFA4BB0>: Failed to resolve 'jira.hyperledger.org' ([Errno 11001] getaddrinfo failed)"))
Fetching Field info for IntelDAOS from https://jira.hpdd.intel.com/rest/api/2/field
❌ Error fetching Field info for IntelDAOS: HTTPSConnectionPool(host='jira.hpdd.intel.com', port=443): Max retries exceeded with url: /rest/api/2/field (Caused by SSLError(SSLCertVerificationError(1, "[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: Hostname mismatch, certificate is not valid for 'jira.hpdd.intel.com'. (_ssl.c:997)")))
Fetching Field info for JFrog from https://jfrog.com/jira/rest/ap

### Download Jira Data Commands

In [4]:
async def fetch_json_with_retry(session, url, timeout_seconds, retries=5, backoff_factor=1, status_forcelist=None):
    """Helper function to fetch JSON with retries for aiohttp."""
    if status_forcelist is None:
        status_forcelist = {429, 500, 502, 503, 504}
    
    last_exception = None
    for attempt in range(retries):
        try:
            async with session.get(url, timeout=ClientTimeout(total=timeout_seconds)) as response:
                if response.status in status_forcelist and attempt < retries - 1:
                    last_exception = ClientResponseError(response.request_info, response.history, status=response.status, message=f"Retryable status: {response.status}", headers=response.headers)
                    raise last_exception # Trigger retry
                response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
                return await response.json()
        except (ClientError, asyncio.TimeoutError, json.JSONDecodeError) as e:
            last_exception = e
            if attempt == retries - 1:
                print(f"Final attempt failed for URL {url}: {e}")
                raise  # Re-raise the last exception after all retries
            
            sleep_time = backoff_factor * (2 ** attempt)
            print(f"Request to {url} failed (attempt {attempt + 1}/{retries}): {e}. Retrying in {sleep_time}s...")
            await asyncio.sleep(sleep_time)
    # This part should ideally not be reached if exceptions are handled correctly
    if last_exception:
        raise last_exception # Should have been raised in the loop
    return None # Fallback, though retries should exhaust or succeed


async def download_and_write_data_mongo(
    jira_data_source,
    num_desired_results = None,  # Leave as "None" to download all, otherwise specify a number
    iteration_max = 250,  # Recommended to keep at or below 500
    start_index = 0,  # This allows you to start back up from a different place
    request_timeout = 30 # Timeout for individual requests in seconds
    ):
    
    def build_url(base_url, start_index, iteration_max=100):
        return (
            base_url +
            f"/rest/api/2/search?"
            f"jql="
            f"&ORDER%20BY%20created%20ASC"
            f"&startAt={start_index}"
            f"&maxResults={iteration_max}"
            f"&expand=changelog"
            )
    
    collection = db[jira_data_source['name']] # Synchronous MongoDB client

    # How many issues to collect before writing to MongoDB
    num_issues_per_write = 10000
    issues = []
    issues_downloaded = 0
    
    async with aiohttp.ClientSession() as session:
        # Available and requested number of results
        try:
            initial_url = build_url(jira_data_source['jira_url'], 0, 0)
            initial_response_json = await fetch_json_with_retry(session, initial_url, request_timeout)
            if initial_response_json is None or 'total' not in initial_response_json:
                 print(f"Could not fetch total number of results from {initial_url}. Response: {initial_response_json}")
                 return
            num_available_results = initial_response_json['total']
        except Exception as e:
            print(f"Error fetching total number of results: {e}")
            return

        print(f'Number of Desired Results   : {num_desired_results if num_desired_results else "All"}')
        print(f'Number of Available Results : {num_available_results}')
        print('')

        # Set the number of results to retrieve based on information from Jira server
        if not num_desired_results:
            num_remaining_results = num_available_results
        else:
            num_remaining_results = min(int(num_desired_results), num_available_results)
        # Adjust remaining results based on their start index
        num_remaining_results -= start_index

        max_count_width = len(str(num_available_results)) + 1
        print(f"Total Remaining:{num_remaining_results:< {max_count_width}}")
        
        while(num_remaining_results > 0):
            start_time = time()
            num_items_to_retrieve = min(iteration_max, num_remaining_results)
            num_returned_issues = 0

            url = build_url(jira_data_source['jira_url'], start_index, num_items_to_retrieve)
            try:
                response_json = await fetch_json_with_retry(session, url, request_timeout)
                if response_json and 'issues' in response_json:
                    issues.extend(response_json['issues'])
                    num_returned_issues = len(response_json['issues'])
                elif response_json:
                    print(f"Warning: 'issues' key not found in response from {url}. Response JSON: {response_json}")
                else:
                    print(f"Warning: Received no valid JSON response from {url} after retries.")
                    # This might indicate a persistent issue, break to avoid infinite loop if num_remaining_results isn't decreasing
                    if num_items_to_retrieve > 0 : # Only break if we expected issues
                        break


            except Exception as e:
                print(f"Failed to fetch or process issues from {url} after retries: {e}")
                # Decide if you want to break or continue. Breaking if a fetch fails catastrophically.
                if num_items_to_retrieve > 0: # Only break if we expected issues
                    break 

            if num_returned_issues == 0 and num_items_to_retrieve > 0 and num_remaining_results > 0:
                print(f"Retrieved 0 issues from {url} when {num_items_to_retrieve} were requested. Start index: {start_index}. This might indicate an issue or end of data.")
                # If we consistently get 0 issues, we should break to prevent infinite loops.
                # The write logic below and the main while condition will handle graceful exit.

            num_remaining_results -= num_returned_issues
            end_index = start_index + num_returned_issues - 1 if num_returned_issues > 0 else start_index
            
            print(
                f"Total Remaining:{num_remaining_results:< {max_count_width}}  "
                f"Retrieved Items: {start_index:< {max_count_width}} - {end_index:< {max_count_width}} ({num_returned_issues} issues) "
                f"Duration: {format_duration(start_time, time())}")

            start_index += num_returned_issues

            if len(issues) >= num_issues_per_write or (num_remaining_results == 0 and len(issues) > 0) or (num_returned_issues == 0 and len(issues) > 0 and num_remaining_results > 0):
                if len(issues) > 0:
                    try:
                        # Synchronous DB write
                        collection.insert_many(issues)
                        print(f"... {len(issues)} issues written to database ...")
                        issues_downloaded += len(issues)
                        issues = []
                    except Exception as e:
                        print(f"Error writing issues to MongoDB: {e}")
                        # Decide on error handling: stop, or log and continue, potentially losing this batch.
                        # For now, issues remain in memory; if next fetch is okay, they'll be written.
                        # If this error is persistent, memory could fill.
                elif num_remaining_results == 0:
                    pass # No issues in buffer, and no more results remaining

            if num_returned_issues == 0 and num_items_to_retrieve > 0 and num_remaining_results > 0 :
                print(f'No issues returned in the last request (expected {num_items_to_retrieve}). Exiting download for {jira_data_source["name"]}.')
                if len(issues) > 0: # Attempt to write any remaining issues
                    try:
                        collection.insert_many(issues)
                        print(f"... Final batch of {len(issues)} issues written to database ...")
                        issues_downloaded += len(issues)
                        issues = []
                    except Exception as e:
                        print(f"Error writing final batch of issues to MongoDB: {e}")
                break # Exit the while loop

    print('')
    print(f"Number of Downloaded Issues for {jira_data_source['name']}: {issues_downloaded}")
    if len(issues) > 0:
        print(f"Warning: {len(issues)} issues remain in memory and were not written to the database.")


In [9]:
# Last download time: 6h 47m
await download_and_write_data_mongo(jira_data_sources['Apache'])

CancelledError: 

In [None]:
# Last download time: 0h 27m
download_and_write_data_mongo(jira_data_sources['Hyperledger'])

In [12]:
# Last download time: 0h 5m
download_and_write_data_mongo(jira_data_sources['IntelDAOS'])

SSLError: HTTPSConnectionPool(host='jira.hpdd.intel.com', port=443): Max retries exceeded with url: /rest/api/2/search?jql=&ORDER%20BY%20created%20ASC&startAt=0&maxResults=0&expand=changelog (Caused by SSLError(SSLCertVerificationError(1, "[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: Hostname mismatch, certificate is not valid for 'jira.hpdd.intel.com'. (_ssl.c:997)")))

In [13]:
# Last download time: 0h 12m
download_and_write_data_mongo(jira_data_sources['JFrog'])

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [11]:
# Last download time: 6h 26m
download_and_write_data_mongo(jira_data_sources['Jira'])

Number of Desired Results   : All
Number of Available Results : 335613

Total Remaining: 335613
Total Remaining: 335363  Retrieved Items:  0      -  249     Duration: 00:00:08.2524
Total Remaining: 335113  Retrieved Items:  250    -  499     Duration: 00:00:08.2235
Total Remaining: 334863  Retrieved Items:  500    -  749     Duration: 00:00:09.7797
Total Remaining: 334613  Retrieved Items:  750    -  999     Duration: 00:00:09.5789


KeyboardInterrupt: 

In [15]:
# Last download time: 0h 30m
download_and_write_data_mongo(jira_data_sources['JiraEcosystem'], num_desired_results=1000)

Number of Desired Results   : 1000
Number of Available Results : 43130

Total Remaining: 1000
Total Remaining: 900   Retrieved Items:  0    -  99    Duration: 00:00:03.6841
Total Remaining: 800   Retrieved Items:  100  -  199   Duration: 00:00:02.9340
Total Remaining: 700   Retrieved Items:  200  -  299   Duration: 00:00:03.2498
Total Remaining: 600   Retrieved Items:  300  -  399   Duration: 00:00:02.9121
Total Remaining: 500   Retrieved Items:  400  -  499   Duration: 00:00:03.2263
Total Remaining: 400   Retrieved Items:  500  -  599   Duration: 00:00:04.0727
Total Remaining: 300   Retrieved Items:  600  -  699   Duration: 00:00:04.3523
Total Remaining: 200   Retrieved Items:  700  -  799   Duration: 00:00:03.4787
Total Remaining: 100   Retrieved Items:  800  -  899   Duration: 00:00:03.8095
Total Remaining: 0     Retrieved Items:  900  -  999   Duration: 00:00:04.1362
... Issues written to database ...

Number of Downloaded Issues: 1000


In [None]:
# download_and_write_data_mongo(jira_data_sources['MariaDB'])

In [None]:
# download_and_write_data_mongo(jira_data_sources['Mindville'])

In [None]:
# Last download time: 1h 26m
download_and_write_data_mongo(jira_data_sources['Mojang'])

In [None]:
# Last download time: 3h 23m
download_and_write_data_mongo(jira_data_sources['MongoDB'])

In [None]:
# Last download time: 0h 50m
download_and_write_data_mongo(jira_data_sources['Qt'])

In [10]:
# Last download time: 3h 58m
download_and_write_data_mongo(jira_data_sources['RedHat'])

Number of Desired Results   : All
Number of Available Results : 0

Total Remaining: 0

Number of Downloaded Issues: 0


In [None]:
# Last download time: 0h 24m
download_and_write_data_mongo(jira_data_sources['Sakai'])

In [None]:
# Last download time: 1h 25m
download_and_write_data_mongo(jira_data_sources['SecondLife'])

In [None]:
# Last download time: 1h 25m
download_and_write_data_mongo(jira_data_sources['Sonatype'])

In [None]:
# Last download time: 0h 20m
download_and_write_data_mongo(jira_data_sources['Spring'])

### Download Jira Issue Comments

In [None]:
def download_jira_issue_comments(jira_data_source, *, max_comments_per_query=50, resume_at_date='0', query_wait_time_minutes=None):
    
    def build_url(base_url, issue_key, comments_start_index=0, max_comments_per_query=max_comments_per_query):
        return (
            base_url +
            f"/rest/api/2/issue/{issue_key}/comment"
            f"?orderBy=created"
            f"&startAt={comments_start_index}"
            f"&maxResults={max_comments_per_query}"
            )
    
    # Initialise requests object with configurations to make it more stable
    session = requests.Session()
    retry = Retry(total=4, connect=4, backoff_factor=0.5)
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    
    acceptable_failure_messages = [
        'Login Required',
        'Issue Does Not Exist',
        'Issue does not exist or you do not have permission to see it.',
    ]
    
    progress_bar_num_chunks = 100
    progress_bar_last = -1

    collection = db[jira_data_source['name']]

    # Ignore Jiras that we know are now unreachable or empty
    if jira_data_source['name'] in INVALID_JIRAS:
        print(f"Cannot download comments for {jira_data_source['name']} due to innaccessible Jira repo.")
        return
    
    print('Querying MongoDB for list of remaining issue keys to get comments for ...')

    # The data is downloaded per issue key, so we must get a complete list of all issue keys
    # This could be kept as a gnerator (by removing the explicit "list()" operator), but then we have to query the dataset twice.
    # This is a space-time tradoff, and I have chosen time.
    jira_issue_keys = list(collection.aggregate([
        # Only get issues without comments already
        { '$match': { 'fields.comments': { '$eq': None } } },
        # We only need the issue id, key, created date
        { '$project': { 'key': 1, 'created': '$fields.created' } },
        # Only get issues at or equal to our "resume" date
        { '$match': { 'created': { '$gte': resume_at_date } } },
        # Sort the results by created date so we can resume if failure occurs
        { '$sort': { 'created': 1 } },
    ], allowDiskUse=True))
    
    # Get the total count of issues so we can get an understanding of progress
    # jira_query_issue_count = collection.count_documents({ 'fields.created': { '$gte': resume_at_date } })
    jira_query_issue_count = len(jira_issue_keys)
    num_issues_complete = 0
    
    print(f"There are {jira_query_issue_count} remaining issues.")

    # Loop through the issue keys, downloading the comments one at a time
    for issue in jira_issue_keys:
        
        # print(f"Working on issue {issue['key']} with creation date {issue['created']} ({num_issues_complete:,} / {jira_query_issue_count:,})")
        
        issue_comments = []
        comments_index = 0
        
        # Loop through comment downloads until all are downloaded. We don't know how many comments exist until we ask for the first N, so we need a while loop
        while True:
            
            # Get the issue comments
            url = build_url(jira_data_source['jira_url'], issue['key'], comments_start_index=comments_index)
            response = session.get(url, verify=False)
            response_json = response.json()
            
            # Check if the response is valid. If not, we skip to the next issue. Some issues are private, etc., so we skip them
            if 'errorMessages' in response_json:
                # Here are the error messages we may run into that we simply skip. We don't want to skip all error messages,
                # so we only check a few here and otherwise break the script to investigate.
                if [message for message in acceptable_failure_messages if message in response_json['errorMessages']]:
                    break  # Break the while loop collecting comments for this issue, and move on to the next issue
                # Otherwise ...
                print(f"\nWas working on {issue['key']} with creation date {issue['created']} ({num_issues_complete:,} / {jira_query_issue_count:,})")
                print('\nLast response json:')
                display(response_json)
                raise Exception('Not sure why the network request has failed.')
            
            # Save this round of issue comments
            issue_comments.extend(response_json['comments'])
            comments_index = len(issue_comments)
            
            # Wait a little as to not overload the number of requests being sent
            if query_wait_time_minutes:
                sleep(query_wait_time_minutes)
            
            # Check if we have collected all of the possible comments
            if comments_index == response_json['total']:
                break

        # Write all of the comments to the MongoDB Jira issue at once
        collection.update_one(
            {'_id': issue['_id']},
            { '$set': { 'fields.comments': issue_comments } }
        )
        
        num_issues_complete += 1
        
        # Output progress
        clear_output(wait=True)
        print(f"Jira: {jira_data_source['name']}")
        print(f"Number of issues to download comments from: {jira_query_issue_count:,}")
        print(f"resume_at_date: {resume_at_date}")
        print('')
        print(f"Last confirmed issue {issue['key']} with creation date {issue['created']} ({num_issues_complete:,} / {jira_query_issue_count:,})")
        print(f"Progress: [{'#'*round((num_issues_complete/jira_query_issue_count)*progress_bar_num_chunks):.<{progress_bar_num_chunks}}]")

In [None]:
# Last download time: Multiple weeks due to extreme rate-limiting.
download_jira_issue_comments(
    jira_data_sources['Apache'],
    query_wait_time_minutes=0.1
)

In [None]:
# Last download time: 1h 40m (~290/m)
download_jira_issue_comments(
    jira_data_sources['Hyperledger']
)

In [None]:
# Last download time: 3h 10m (~50/m)
download_jira_issue_comments(
    jira_data_sources['IntelDAOS']
)

In [None]:
# Last download time: 1h 20m (~200/m)
download_jira_issue_comments(
    jira_data_sources['JFrog']
)

In [None]:
# Last download time: 21h 20m (~215/m)
download_jira_issue_comments(
    jira_data_sources['Jira']
)

In [None]:
# Last download time: 6h 40m (~105/m)
download_jira_issue_comments(
    jira_data_sources['JiraEcosystem']
)

In [None]:
# # Last download time: h m (/m)
# download_jira_issue_comments(
#     jira_data_sources['MariaDB']
# )

In [None]:
# # Last download time: h m (/m)
# download_jira_issue_comments(
#     jira_data_sources['Mindville']
# )

In [None]:
# Last download time: 14h 20m (~490/m)
download_jira_issue_comments(
    jira_data_sources['Mojang']
)

In [None]:
# Last download time: 18h 20m (~125/m)
download_jira_issue_comments(
    jira_data_sources['MongoDB']
)

In [None]:
# Last download time: 6h 10m (~400/m)
download_jira_issue_comments(
    jira_data_sources['Qt']
)

In [None]:
# Last download time: 18h 5m (~325/m)
download_jira_issue_comments(
    jira_data_sources['RedHat']
)

In [None]:
# Last download time: 16h 50m (~50/m)
download_jira_issue_comments(
    jira_data_sources['Sakai']
)

In [None]:
# Last download time: 0h 16m (~115/m)
download_jira_issue_comments(
    jira_data_sources['SecondLife']
)

In [None]:
# Last download time: 10h 50m (~135/m)
download_jira_issue_comments(
    jira_data_sources['Sonatype']
)

In [None]:
# Last download time: 8h 0m (~145/m)
download_jira_issue_comments(
    jira_data_sources['Spring']
)