In [13]:
import os
from dotenv import load_dotenv
from mongoengine import connect, disconnect
import pandas as pd
from pycoshark.mongomodels import People, CodeReview, CodeReviewChangeLog, CodeReviewRevision, CodeReviewComment
import re
import requests
import json

In [14]:
# Load environment variables from .env file
load_dotenv('.env')

# Retrieve the password from the environment variable
user = os.getenv('DB_USER')
password = os.getenv('DB_PW')
db_host = os.getenv('DB_HOST')
db_port = os.getenv('DB_PORT')
db_name = os.getenv('DB_NAME')

disconnect()

address = f'mongodb://{user}:{password}@{db_host}:{db_port}/{db_name}?authSource=admin'
print(address)
connect(host=address)

mongodb://root:upinndonau@localhost:27017/smartshark?authSource=admin


MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True, authsource='admin', read_preference=Primary())

In [None]:
author_cache = {}


cnt = 0



def get_comment_data(comment: CodeReviewComment, lines_map, cnt):
    global author_cache

    author = author_cache.get(comment.author_id, People.objects.get(id=comment.author_id))
    author_cache[comment.author_id] = author

    lines = None
    if comment.in_reply_to_id is None:
        lines = lines_map.get(comment.external_id, '')

    return {
        'source': f'p{comment.patch_set_number} {comment.file_path} {comment.line} {cnt}',
        'message': comment.message,
        'lines': lines,
        'author': author.name,
        'date': comment.updated_at
    }



def get_file_chunks(review: CodeReview):
    url = f'https://review.opendev.org/changes/openstack%2Fnova~master~{review.change_id}/comments?enable-context=true&context-padding=5'

    print(url)

    lines_map = {}

    response = requests.get(url).text.split('\n')[1:]

    try:
        data = json.loads(''.join(response))
    except:
        return lines_map

    for path, comments in data.items():
        for comment in comments:
            lines_map[comment.get('id')] = '\n'.join([line.get('context_line') for line in comment.get('context_lines', [])])

    return lines_map

def store_csv(review_external_number):
    global cnt
    global author_cache

    review = CodeReview.objects.get(external_number=review_external_number)

    lines_map = get_file_chunks(review)

    result =  [{
        'source': 'description',
        'message': review.description,
        'lines': None,
        'author': People.objects.get(id=review.author_id).name,
        'date': review.created_at
    }]

    def append_children(parent: CodeReviewComment, cnt):

        children = CodeReviewComment.objects.filter(in_reply_to_id=parent.external_id)
        for child in children:
            cnt += 1
            result.append(get_comment_data(child, lines_map, cnt))
            append_children(child, cnt)


    review_comments_parent = CodeReviewComment.objects.filter(code_review_id=review.id, in_reply_to_id=None).order_by('patch_set_number')

    for parent in review_comments_parent:
        cnt = 0
        result.append(get_comment_data(parent, lines_map, cnt))
        append_children(parent, cnt)


    excluded_names_regex = f'(:?{"|".join(["trivial rebase", "Intel PCI CI", "VMware NSX CI", "SmokeStack CI", "Microsoft Hyper-V CI", "Citrix XenServer CI", "IBM PowerKVM CI", "DB Datasets CI", "Jenkins"])})'.lower()
    excluded_names_regex = re.compile(excluded_names_regex, re.IGNORECASE)
    excluded_authors = People.objects.filter(name=excluded_names_regex)

    review_logs = CodeReviewChangeLog.objects.filter(code_review_id=review.id, author_id__nin=[author.id for author in excluded_authors]).order_by('created_at')
    revision_cache = {}


    for log in review_logs:
        revision = revision_cache.get(log.revision_id, CodeReviewRevision.objects.get(id=log.revision_id))
        revision_cache[log.revision_id] = revision

        if (re.findall(f'uploaded patch set {revision.revision_number}', log.message, flags=re.IGNORECASE) 
            or re.findall(f'^patch set {revision.revision_number}: \(\d+ inline comments?\)\n+$', log.message, flags=re.IGNORECASE)):
            continue

        author = author_cache.get(log.author_id, People.objects.get(id=log.author_id))
        author_cache[log.author_id] = author

        result.append({
            'source': f'p{revision.revision_number} log {author.name}',
            'message': log.message,
            'lines': None,
            'author': author.name,
            'date': log.created_at
        })


    df = pd.DataFrame(result)

    df.to_csv(f'review_comments/review_comments_{review_external_number}.csv', index=False)
    print(f'review_comments_{review_external_number}.csv saved!')

In [16]:
reviews = [1324, 1934, 10903, 14805, 14806, 17839, 17946, 18864, 19457, 19679, 22897, 24363, 26113, 28774, 28880, 29062, 30201, 31593, 34464, 40426, 40467, 42389, 44239, 53069, 59034, 59560, 63178, 63612, 64289, 65042, 67338, 70215, 70747, 74537, 93754, 94294, 94811, 102324, 107009, 107602, 109489, 113545, 113741, 113772, 119396, 119446, 120675, 123682, 124714, 129235, 132231, 136717, 140337, 142282, 144792, 145007, 150293, 151664, 155678, 155853, 164940, 175551]


for review in reviews:
    store_csv(review)

https://review.opendev.org/changes/openstack%2Fnova~master~I034404892018e99987f80789d7f7e406ff31658c/comments?enable-context=true&context-padding=5
review_comments_1324.csv saved!
https://review.opendev.org/changes/openstack%2Fnova~master~I8877fad3d41ae055c15b1adff99e535c34e9ce92/comments?enable-context=true&context-padding=5
review_comments_1934.csv saved!
https://review.opendev.org/changes/openstack%2Fnova~master~I51645687249c75e7776a684f19529a1e78f33a41/comments?enable-context=true&context-padding=5
review_comments_10903.csv saved!
https://review.opendev.org/changes/openstack%2Fnova~master~I0d4a7dc5836d39e405824528de214f23b214849f/comments?enable-context=true&context-padding=5
review_comments_14805.csv saved!
https://review.opendev.org/changes/openstack%2Fnova~master~Ieaa33efd50bb84f1b475c437b26af1302fdfd19f/comments?enable-context=true&context-padding=5
review_comments_14806.csv saved!
https://review.opendev.org/changes/openstack%2Fnova~master~Ia89b613d7d934a0072f262add039fe14ef236