In [21]:
from mongoengine import connect
from datetime import datetime
import math
import json
import copy
import pandas as pd
import sys
sys.path.insert(0, '..\\mongo-db-scripts')
from Bug import Bug

connect('bug_reports_db', host='127.0.0.1', port=27017)

with open('data\\fields_name_not_cf.json') as input:
  fields = json.load(input)

with open('data\\bugs_id_creationtime_dict.json') as input:
  bugs_creation_dict = json.load(input)

In [22]:
DATE_FORMAT = '%Y-%m-%dT%H:%M:%SZ'

intervals = ['up to 1h', 'up to 12h', 'up to 1 week', 'total']
bugs_total_changes = []
temp_bug_init = {
  'up to 1h': 0,
  'up to 12h': 0,
  'up to 1 week': 0,
  'total': 0
}

def get_interval(creation_date, when_changed):
  creation_date = datetime.strptime(creation_date, DATE_FORMAT)
  when_changed = datetime.strptime(when_changed, DATE_FORMAT)

  up_to_1h = creation_date + pd.DateOffset(hours=1)
  up_to_12h = creation_date + pd.DateOffset(hours=12)
  up_to_week = creation_date + pd.DateOffset(days=7)

  if (when_changed <= up_to_1h):
    return 0
  elif (when_changed <= up_to_12h):
    return 1
  elif (when_changed <= up_to_week):
    return 2
  else:
    return 3

def count_changes(bug):
  bug_id = str(bug['bug_id'])

  if bug_id not in bugs_creation_dict:
    return {}
  
  counted_changes = copy.deepcopy(temp_bug_init)

  for changes in bug['history']:

    current_interval_index = get_interval(bugs_creation_dict[bug_id], changes['when'])

    for change in changes['changes']:
      field_name = change['field_name']

      if (field_name not in fields):
        continue
      
      for i in range(current_interval_index, len(intervals)):
        counted_changes[intervals[i]] += 1
      
  return counted_changes

In [None]:
BUGS_LIMIT = 50000
total_bugs = Bug.objects.count()
bugs_per_iteration = math.ceil(total_bugs/BUGS_LIMIT)

for i in range(bugs_per_iteration):
  skip_quantity = i * BUGS_LIMIT
  result = Bug.objects.only('bug_id', 'history').limit(BUGS_LIMIT).skip(skip_quantity)

  bugs = [bug.to_mongo().to_dict() for bug in result]

  for bug in bugs:
    temp_bug = count_changes(bug)
    temp_bug['bug_id'] = bug['bug_id']
    bugs_total_changes.append(temp_bug)

In [24]:
with open('data\\processed_bugs_total_changes_date.json', 'w') as output:
  json.dump(bugs_total_changes, output)