In [1]:
from tqdm.notebook import tqdm
from time import sleep
import numpy as np
import pandas as pd
import requests
import json

%load_ext autoreload
%autoreload 2
sys.path.append('../')
from src.reviews import *

In [2]:
project = 'cinder'

In [3]:
df_commits = pd.read_csv(f'../data/commits/commits_{project}.csv')

In [4]:
len(df_commits)

8535

Change IDs

In [5]:
get_prefix = f"https://review.opendev.org/projects/openstack%2F{project}/commits/"

In [6]:
df_commits['change_id'] = ''

In [7]:
with tqdm(total = len(df_commits)) as pbar:
    for i, row in df_commits.iterrows():
        pbar.set_postfix({'hash' :  row['hash']})
        get_addr = get_prefix + row['hash']
        response = requests.get(get_addr)
        response_json = json.loads(response.text[5:]) # some leading nonsense
        message = response_json['message']
        change_id = get_change_id(message)
        if change_id == '':
            print("---hash {} does not contain a change id in message:\n{}".format(
                row['hash'], message))
        df_commits.at[i, 'change_id'] = change_id
        # Progress bar
        pbar.update(1)
        sleep(0.001)

  0%|          | 0/8535 [00:00<?, ?it/s]

---hash c53d8e343e50d4cf7ea9a6a81258848c2d893bfb does not contain a change id in message:
Initial fork out of Nova.

---hash 9c55720129df12780d1d6480942d6d96abf54a01 does not contain a change id in message:
Revert "Add action extensions to support nova integration."

This reverts commit 8d8903f466cd7d4270e46ad896817fe94c99b575
---hash ea716377a8a04937acfe637086b3657c4527f2ed does not contain a change id in message:
Revert "Don't zero out snapshot volume on snapshot_delete"

This reverts commit 1b3322d45fe2c5ed72cc7f8674e5e319928065ad

Turns out that although it's less likely we can still see this issue when zeroing out regular volumes.
---hash 444cd5429cc00f3a4ddb5cde4b8b1c974b886e19 does not contain a change id in message:
Revert "use O_DIRECT when copying from /dev/zero too"

It turns out running oflag=direct against the LVM snapshot fails for an IO error every time.  Works fine against the regular volume, but fails on snapshot.

This reverts commit 1405a6440d646524d41adfed4fc1344948

In [8]:
np.sum(df_commits['change_id'] == '')

6

In [9]:
df_commits = df_commits.loc[df_commits['change_id'] != '']

Change Messages

In [10]:
get_prefix = f'https://review.opendev.org/changes/openstack%2F{project}~master~'

In [11]:
reviews = []

In [12]:
with tqdm(total = len(df_commits)) as pbar:
    for i, row in df_commits.iterrows():
        pbar.set_postfix({'hash' :  row['hash']})
        get_addr = get_prefix + row['change_id'] + "/messages"
        response = requests.get(get_addr)
        if response.status_code == 404:
            print("---hash {} with change id {} had invalid response".format(
                row['hash'], row['change_id']))
        else:
            response_json = json.loads(response.text[5:])
            reviews_for_single_change = get_review_info(project, response_json, row['author_name'])
            for re in reviews_for_single_change:
                re['hash'] = row['hash']
                re['change_id'] = row['change_id']
            reviews.extend(reviews_for_single_change)
        # Progress bar
        pbar.update(1)
        sleep(0.001)

  0%|          | 0/8529 [00:00<?, ?it/s]

---hash 365a9908b89688fa609ba86c84941848628346d9 with change id I70790aa39c0774726de71e5fa5751c45e7e34ffd had invalid response
---hash 900d49723f65e87658381ff955559f54ac98c487 with change id I73f3bdccb4be98df95fa853864e465f4d83a8884 had invalid response
---hash ae6c589c52557988e24b5ee8005d8f81564a6068 with change id I64a4fc6af6f83b2d8a6bb5fe9aa0dc3acd58e4ce had invalid response
---hash 523080113d15bc5a207ebf89e82c4d4da486cbe0 with change id Ib472c156a7007b2166c6857ae1fa2bb72a2e0c1a had invalid response
---hash badedd29ea61d94d742ba71a3b0d54f3ab2cd0bd with change id Iba0e4f9545fc9ba82f080a0fec672761dcfeaeec had invalid response
---hash cc0e6118f60964c162d6482dc96454ffec30e061 with change id I7131dbf556627c5ad6fc237bab1e6b867f55c7f8 had invalid response
---hash 7d8369099180bb65449685755b0f69f1378bc8ae with change id I9fb29667964a4180af822b811345fa1148dbcd66 had invalid response
---hash e784fcfe1b92cd2b5a2697691edcffae947a24ea with change id I887e604f813292239c9128862cf6c014ad73e286 had i

Save to output

In [15]:
df_reviews = pd.DataFrame(reviews)

In [16]:
df_reviews.to_csv(f"../data/reviews/reviews_{project}.csv", index = False)