# A Minute of Your Time
## Data analysis

In [None]:
import dateutil
import json

import pandas as pd

from scripts import text_helpers

In [None]:
# Set this to the location of your data file
data_file_location = '../../data/mock-data.json'

In [None]:
# Load the data
def ensure_camel(s):
    """
    Convert a string to camel case.
    Some of the JSON properties in the response from the Azure DevOps API are not camel-cased.
    """
    allowed_names = ['_links']
    return s if text_helpers.iscamel(s) or s in allowed_names else text_helpers.camel(s)

with open(data_file_location, 'r', encoding='utf-8') as pull_requests_json_file:
    pull_requests_raw = json.load(pull_requests_json_file, object_hook=lambda d: text_helpers.remap_keys(ensure_camel, d))

In [None]:
def get_data_from_pull_request(pull_request):
    """
    Extract the information we want to process from a pull request API object.
    """
    return [
        pull_request['pullRequestId'],
        pull_request['createdBy']['displayName'],
        dateutil.parser.parse(pull_request['creationDate']),
        dateutil.parser.parse(pull_request['closedDate']),
        len(pull_request['reviewers'])
    ]

In [None]:
# Create a data frame of pull requests
pull_requests = pd.DataFrame(
    [get_data_from_pull_request(pr) for pr in pull_requests_raw],
    columns=['id', 'author', 'created_time', 'merged_time', 'num_reviewers'])

In [None]:
# Add a column for wall-clock time to complete
pull_requests['ttl'] = pull_requests['merged_time'] - pull_requests['created_time']
pull_requests['ttl']

In [None]:
pull_requests.head()

In [None]:
pull_requests.dtypes