In [8]:
import os
import json
import openai
import time
from tqdm import tqdm
import polars as pl
from dotenv import dotenv_values

config = dotenv_values("../.env") 

In [18]:
def get_issue_reponses(title, body):
    cost = 0
    response = openai.ChatCompletion.create(
        engine=config["OPENAI_ENGINE"], 
        messages=[
            {"role": "system", "content": "You are a helpful assistant that is python master and have years of experience with github moderation."},
            {"role": "user", "content": f"Is the description of issue good? \ntitle: {title}\nbody: {body}\nAnswer with Yes/No as single word"},
        ]
    )
    description_quality = response['choices'][0]['message']['content']
    cost += response['usage']["total_tokens"]

    response = openai.ChatCompletion.create(
        engine=config["OPENAI_ENGINE"], 
        messages=[
            {"role": "system", "content": "You are a helpful assistant that is python master and have years of experience with github moderation."},
            {"role": "user", "content": f"Give me one sentence summary of following issue, use ten words or less: \ntitle: {title}\nbody: {body}"},
        ]
    )
    summary = response['choices'][0]['message']['content']
    cost += response['usage']["total_tokens"]

    response = openai.ChatCompletion.create(
        engine=config["OPENAI_ENGINE"], 
        messages=[
            {"role": "system", "content": "You are a helpful assistant that is python master and have years of experience with github moderation. You are maintainer of shap package"},
            {"role": "user", "content": f"Suggest short label for this issue. Label should be of maximum two words: \ntitle: {title}\nbody: {body}"},
        ]
    )
    short_label = response['choices'][0]['message']['content']
    cost += response['usage']["total_tokens"]

    issue_labels = [
        "bug",
        "documentation",
        "enhancement",
        "good first issue",
        "stale"
    ]
    response = openai.ChatCompletion.create(
        engine=config["OPENAI_ENGINE"], 
        messages=[
            {"role": "system", "content": "You are a helpful assistant that is python master and have years of experience with github moderation. You are maintainer of shap package"},
            {"role": "user", "content": f"Select label from {issue_labels}, where stale label is for issues that are too short and not clear: \ntitle: {title}\nbody: {body}\nAnswer with label name from list and nothing else"},
        ]
    )
    label = response['choices'][0]['message']['content']
    cost += response['usage']["total_tokens"]
    # response = openai.ChatCompletion.create(
    #     engine=config["OPENAI_ENGINE"], 
    #     messages=[
    #         {"role": "system", "content": "You are a helpful assistant that is python master and have years of experience with github moderation. You are maintainer of shap package"},
    #         {"role": "user", "content": f"Write first comment for: \ntitle: {title}\nbody: {body}\n"},
    #     ]
    # )
    # comment = response['choices'][0]['message']['content']

    return description_quality, summary, short_label, label, cost

In [21]:
openai.api_type = "azure"
openai.api_base = config["OPENAI_ENDPOINT"]
openai.api_version = "2023-05-15"
openai.api_key = config["OPENAI_KEY"]

data = pl.read_json("../data/issues_v1.json").filter(pl.col("number").is_not_null()).sort("number")
data = data.with_columns([
    pl.lit("").alias("description_quality"),
    pl.lit("").alias("summary"),
    pl.lit("").alias("short_label"),
    pl.lit("").alias("label"),
])

In [22]:
total_tokens = 0

In [28]:

for i in tqdm(range(len(data))):
    if data[i, "summary"] != "":
        continue
    row = data[i]
    title = row["title"]
    body = row["body"]
    try:
        description_quality, summary, short_label, label, tokens = get_issue_reponses(title, body)
        data[i, "description_quality"] = description_quality
        data[i, "summary"] = summary
        data[i, "short_label"] = short_label
        data[i, "label"] = label
        total_tokens += tokens
    except openai.error.RateLimitError:
        time.sleep(20)
        description_quality, summary, short_label, label, tokens = get_issue_reponses(title, body)
        data[i, "description_quality"] = description_quality
        data[i, "summary"] = summary
        data[i, "short_label"] = short_label
        data[i, "label"] = label
        total_tokens += tokens
    print(f"Total tokens: {total_tokens}")
    print(f"Average tokens per issue: {total_tokens / (i + 1)}")
    print(f"Estimated cost: {total_tokens / 1000 * 0.028}")

  3%|▎         | 88/2955 [00:02<01:14, 38.37it/s]

Total tokens: 38714
Average tokens per issue: 439.9318181818182
Estimated cost: 1.083992
Total tokens: 39141
Average tokens per issue: 439.7865168539326
Estimated cost: 1.095948


  3%|▎         | 90/2955 [00:27<19:56,  2.40it/s]

Total tokens: 39574
Average tokens per issue: 439.7111111111111
Estimated cost: 1.108072


  3%|▎         | 91/2955 [00:29<21:21,  2.24it/s]

Total tokens: 39999
Average tokens per issue: 439.54945054945057
Estimated cost: 1.1199720000000002


  3%|▎         | 92/2955 [00:51<53:47,  1.13s/it]

Total tokens: 40436
Average tokens per issue: 439.5217391304348
Estimated cost: 1.132208


  3%|▎         | 93/2955 [00:53<54:56,  1.15s/it]

Total tokens: 40867
Average tokens per issue: 439.4301075268817
Estimated cost: 1.1442759999999998


  3%|▎         | 94/2955 [01:16<1:52:59,  2.37s/it]

Total tokens: 41307
Average tokens per issue: 439.43617021276594
Estimated cost: 1.1565960000000002


  3%|▎         | 95/2955 [01:18<1:50:33,  2.32s/it]

Total tokens: 41731
Average tokens per issue: 439.2736842105263
Estimated cost: 1.168468


  3%|▎         | 96/2955 [01:41<3:25:25,  4.31s/it]

Total tokens: 42166
Average tokens per issue: 439.2291666666667
Estimated cost: 1.180648


  3%|▎         | 97/2955 [01:42<3:10:47,  4.01s/it]

Total tokens: 42592
Average tokens per issue: 439.0927835051546
Estimated cost: 1.192576


  3%|▎         | 98/2955 [02:05<5:21:52,  6.76s/it]

Total tokens: 43028
Average tokens per issue: 439.0612244897959
Estimated cost: 1.204784


  3%|▎         | 99/2955 [02:07<4:39:39,  5.88s/it]

Total tokens: 43471
Average tokens per issue: 439.1010101010101
Estimated cost: 1.217188


  3%|▎         | 100/2955 [02:30<7:21:42,  9.28s/it]

Total tokens: 43911
Average tokens per issue: 439.11
Estimated cost: 1.229508


  3%|▎         | 101/2955 [02:32<6:04:54,  7.67s/it]

Total tokens: 44351
Average tokens per issue: 439.1188118811881
Estimated cost: 1.241828


  3%|▎         | 102/2955 [02:55<8:59:55, 11.35s/it]

Total tokens: 44796
Average tokens per issue: 439.1764705882353
Estimated cost: 1.254288


  3%|▎         | 103/2955 [02:57<7:02:42,  8.89s/it]

Total tokens: 45229
Average tokens per issue: 439.11650485436894
Estimated cost: 1.266412


  4%|▎         | 104/2955 [03:19<9:57:36, 12.58s/it]

Total tokens: 45659
Average tokens per issue: 439.02884615384613
Estimated cost: 1.278452


  4%|▎         | 105/2955 [03:21<7:33:53,  9.56s/it]

Total tokens: 46085
Average tokens per issue: 438.9047619047619
Estimated cost: 1.29038


  4%|▎         | 106/2955 [03:44<10:37:14, 13.42s/it]

Total tokens: 46520
Average tokens per issue: 438.8679245283019
Estimated cost: 1.3025600000000002


  4%|▎         | 107/2955 [03:47<8:07:00, 10.26s/it] 

Total tokens: 46959
Average tokens per issue: 438.86915887850466
Estimated cost: 1.3148520000000001


  4%|▎         | 108/2955 [04:09<11:01:43, 13.95s/it]

Total tokens: 47408
Average tokens per issue: 438.962962962963
Estimated cost: 1.3274240000000002


  4%|▎         | 109/2955 [04:11<8:10:51, 10.35s/it] 

Total tokens: 47866
Average tokens per issue: 439.13761467889907
Estimated cost: 1.340248


  4%|▎         | 110/2955 [04:35<11:15:16, 14.24s/it]

Total tokens: 48306
Average tokens per issue: 439.1454545454545
Estimated cost: 1.352568


  4%|▍         | 111/2955 [04:36<8:17:11, 10.49s/it] 

Total tokens: 48748
Average tokens per issue: 439.1711711711712
Estimated cost: 1.364944


  4%|▍         | 112/2955 [04:59<11:13:27, 14.21s/it]

Total tokens: 49189
Average tokens per issue: 439.1875
Estimated cost: 1.377292


  4%|▍         | 113/2955 [05:01<8:16:25, 10.48s/it] 

Total tokens: 49627
Average tokens per issue: 439.17699115044246
Estimated cost: 1.389556


  4%|▍         | 114/2955 [05:25<11:23:11, 14.43s/it]

Total tokens: 50091
Average tokens per issue: 439.39473684210526
Estimated cost: 1.4025480000000001


  4%|▍         | 115/2955 [05:26<8:21:54, 10.60s/it] 

Total tokens: 50539
Average tokens per issue: 439.4695652173913
Estimated cost: 1.415092


  4%|▍         | 116/2955 [05:49<11:17:53, 14.33s/it]

Total tokens: 50956
Average tokens per issue: 439.2758620689655
Estimated cost: 1.426768


  4%|▍         | 117/2955 [05:51<8:18:17, 10.53s/it] 

Total tokens: 51385
Average tokens per issue: 439.1880341880342
Estimated cost: 1.43878


  4%|▍         | 118/2955 [06:14<11:12:20, 14.22s/it]

Total tokens: 51828
Average tokens per issue: 439.22033898305085
Estimated cost: 1.451184


  4%|▍         | 119/2955 [06:16<8:16:08, 10.50s/it] 

Total tokens: 52263
Average tokens per issue: 439.1848739495798
Estimated cost: 1.4633639999999999


  4%|▍         | 120/2955 [06:39<11:22:45, 14.45s/it]

Total tokens: 52680
Average tokens per issue: 439.0
Estimated cost: 1.4750400000000001


  4%|▍         | 121/2955 [06:41<8:23:17, 10.66s/it] 

Total tokens: 53105
Average tokens per issue: 438.88429752066116
Estimated cost: 1.48694


  4%|▍         | 122/2955 [07:04<11:17:47, 14.36s/it]

Total tokens: 53539
Average tokens per issue: 438.844262295082
Estimated cost: 1.499092


  4%|▍         | 123/2955 [07:06<8:21:39, 10.63s/it] 

Total tokens: 54002
Average tokens per issue: 439.0406504065041
Estimated cost: 1.512056


  4%|▍         | 124/2955 [07:29<11:13:41, 14.28s/it]

Total tokens: 54428
Average tokens per issue: 438.93548387096774
Estimated cost: 1.523984


  4%|▍         | 125/2955 [07:31<8:17:25, 10.55s/it] 

Total tokens: 54869
Average tokens per issue: 438.952
Estimated cost: 1.536332


  4%|▍         | 126/2955 [07:54<11:11:53, 14.25s/it]

Total tokens: 55306
Average tokens per issue: 438.93650793650795
Estimated cost: 1.548568


  4%|▍         | 127/2955 [07:55<8:15:32, 10.51s/it] 

Total tokens: 55762
Average tokens per issue: 439.07086614173227
Estimated cost: 1.561336


  4%|▍         | 128/2955 [08:19<11:15:08, 14.33s/it]

Total tokens: 56214
Average tokens per issue: 439.171875
Estimated cost: 1.573992


  4%|▍         | 129/2955 [08:20<8:18:51, 10.59s/it] 

Total tokens: 56658
Average tokens per issue: 439.2093023255814
Estimated cost: 1.586424


  4%|▍         | 130/2955 [08:44<11:18:03, 14.40s/it]

Total tokens: 57091
Average tokens per issue: 439.16153846153844
Estimated cost: 1.598548


  4%|▍         | 131/2955 [08:45<8:16:35, 10.55s/it] 

Total tokens: 57532
Average tokens per issue: 439.17557251908397
Estimated cost: 1.6108959999999999


  4%|▍         | 132/2955 [09:09<11:23:43, 14.53s/it]

Total tokens: 57977
Average tokens per issue: 439.219696969697
Estimated cost: 1.623356


  5%|▍         | 133/2955 [09:11<8:22:13, 10.68s/it] 

Total tokens: 58409
Average tokens per issue: 439.16541353383457
Estimated cost: 1.635452


  5%|▍         | 134/2955 [09:34<11:21:01, 14.48s/it]

Total tokens: 58859
Average tokens per issue: 439.24626865671644
Estimated cost: 1.648052


  5%|▍         | 135/2955 [09:36<8:17:48, 10.59s/it] 

Total tokens: 59279
Average tokens per issue: 439.10370370370373
Estimated cost: 1.659812


  5%|▍         | 136/2955 [09:59<11:19:13, 14.46s/it]

Total tokens: 59711
Average tokens per issue: 439.0514705882353
Estimated cost: 1.671908


  5%|▍         | 137/2955 [10:01<8:19:48, 10.64s/it] 

Total tokens: 60166
Average tokens per issue: 439.1678832116788
Estimated cost: 1.684648


  5%|▍         | 138/2955 [10:24<11:17:10, 14.42s/it]

Total tokens: 60597
Average tokens per issue: 439.10869565217394
Estimated cost: 1.6967160000000001


  5%|▍         | 139/2955 [10:26<8:19:59, 10.65s/it] 

Total tokens: 61030
Average tokens per issue: 439.0647482014389
Estimated cost: 1.7088400000000001


  5%|▍         | 140/2955 [10:49<11:15:24, 14.40s/it]

Total tokens: 61453
Average tokens per issue: 438.95
Estimated cost: 1.720684


  5%|▍         | 141/2955 [10:51<8:17:44, 10.61s/it] 

Total tokens: 61892
Average tokens per issue: 438.9503546099291
Estimated cost: 1.732976


  5%|▍         | 142/2955 [11:14<11:14:40, 14.39s/it]

Total tokens: 62335
Average tokens per issue: 438.9788732394366
Estimated cost: 1.7453800000000002


  5%|▍         | 143/2955 [11:16<8:19:38, 10.66s/it] 

Total tokens: 62780
Average tokens per issue: 439.02097902097904
Estimated cost: 1.75784


  5%|▍         | 144/2955 [11:39<11:10:46, 14.32s/it]

Total tokens: 63209
Average tokens per issue: 438.9513888888889
Estimated cost: 1.7698520000000002


  5%|▍         | 145/2955 [11:41<8:15:07, 10.57s/it] 

Total tokens: 63637
Average tokens per issue: 438.8758620689655
Estimated cost: 1.781836


  5%|▍         | 146/2955 [12:04<11:09:40, 14.30s/it]

Total tokens: 64078
Average tokens per issue: 438.8904109589041
Estimated cost: 1.7941840000000002


  5%|▍         | 147/2955 [12:06<8:14:04, 10.56s/it] 

Total tokens: 64513
Average tokens per issue: 438.86394557823127
Estimated cost: 1.806364


  5%|▌         | 148/2955 [12:29<11:11:18, 14.35s/it]

Total tokens: 64968
Average tokens per issue: 438.97297297297297
Estimated cost: 1.819104


  5%|▌         | 149/2955 [12:31<8:17:58, 10.65s/it] 

Total tokens: 65383
Average tokens per issue: 438.81208053691273
Estimated cost: 1.830724


  5%|▌         | 150/2955 [12:54<11:10:26, 14.34s/it]

Total tokens: 65829
Average tokens per issue: 438.86
Estimated cost: 1.8432119999999999


  5%|▌         | 151/2955 [12:56<8:19:22, 10.69s/it] 

Total tokens: 66262
Average tokens per issue: 438.8211920529801
Estimated cost: 1.855336


  5%|▌         | 152/2955 [13:19<11:10:54, 14.36s/it]

Total tokens: 66697
Average tokens per issue: 438.79605263157896
Estimated cost: 1.8675160000000002


  5%|▌         | 153/2955 [13:20<8:10:32, 10.50s/it] 

Total tokens: 67130
Average tokens per issue: 438.75816993464053
Estimated cost: 1.87964


  5%|▌         | 154/2955 [13:43<11:02:30, 14.19s/it]

Total tokens: 67569
Average tokens per issue: 438.7597402597403
Estimated cost: 1.8919320000000002


  5%|▌         | 155/2955 [13:45<8:05:31, 10.40s/it] 

Total tokens: 68005
Average tokens per issue: 438.741935483871
Estimated cost: 1.90414


  5%|▌         | 156/2955 [14:08<11:03:49, 14.23s/it]

Total tokens: 68435
Average tokens per issue: 438.68589743589746
Estimated cost: 1.91618


  5%|▌         | 157/2955 [14:10<8:13:18, 10.58s/it] 

Total tokens: 68860
Average tokens per issue: 438.5987261146497
Estimated cost: 1.92808


  5%|▌         | 158/2955 [14:33<11:04:03, 14.24s/it]

Total tokens: 69295
Average tokens per issue: 438.5759493670886
Estimated cost: 1.94026


  5%|▌         | 159/2955 [14:34<8:07:22, 10.46s/it] 

Total tokens: 69731
Average tokens per issue: 438.55974842767296
Estimated cost: 1.9524679999999999


  5%|▌         | 160/2955 [14:57<11:03:07, 14.24s/it]

Total tokens: 70185
Average tokens per issue: 438.65625
Estimated cost: 1.9651800000000001


  5%|▌         | 161/2955 [14:59<8:11:50, 10.56s/it] 

Total tokens: 70630
Average tokens per issue: 438.69565217391306
Estimated cost: 1.9776399999999998


  5%|▌         | 162/2955 [15:23<11:07:26, 14.34s/it]

Total tokens: 71084
Average tokens per issue: 438.7901234567901
Estimated cost: 1.9903520000000001


  6%|▌         | 163/2955 [15:24<8:12:06, 10.58s/it] 

Total tokens: 71527
Average tokens per issue: 438.81595092024537
Estimated cost: 2.002756


  6%|▌         | 164/2955 [15:48<11:11:14, 14.43s/it]

Total tokens: 71956
Average tokens per issue: 438.7560975609756
Estimated cost: 2.014768


  6%|▌         | 165/2955 [15:50<8:15:23, 10.65s/it] 

Total tokens: 72404
Average tokens per issue: 438.8121212121212
Estimated cost: 2.027312


  6%|▌         | 166/2955 [16:13<11:08:57, 14.39s/it]

Total tokens: 72837
Average tokens per issue: 438.77710843373495
Estimated cost: 2.0394360000000002


  6%|▌         | 167/2955 [16:15<8:13:55, 10.63s/it] 

Total tokens: 73281
Average tokens per issue: 438.80838323353294
Estimated cost: 2.0518680000000002


  6%|▌         | 168/2955 [16:37<11:03:47, 14.29s/it]

Total tokens: 73712
Average tokens per issue: 438.76190476190476
Estimated cost: 2.063936


  6%|▌         | 169/2955 [16:39<8:12:53, 10.62s/it] 

Total tokens: 74141
Average tokens per issue: 438.70414201183434
Estimated cost: 2.0759480000000003


  6%|▌         | 170/2955 [17:02<11:03:25, 14.29s/it]

Total tokens: 74572
Average tokens per issue: 438.6588235294118
Estimated cost: 2.088016


  6%|▌         | 171/2955 [17:04<8:13:42, 10.64s/it] 

Total tokens: 75018
Average tokens per issue: 438.70175438596493
Estimated cost: 2.100504


  6%|▌         | 172/2955 [17:27<11:04:54, 14.34s/it]

Total tokens: 75459
Average tokens per issue: 438.7151162790698
Estimated cost: 2.112852


  6%|▌         | 173/2955 [17:29<8:09:44, 10.56s/it] 

Total tokens: 75884
Average tokens per issue: 438.63583815028903
Estimated cost: 2.124752


  6%|▌         | 174/2955 [17:52<10:58:39, 14.21s/it]

Total tokens: 76332
Average tokens per issue: 438.6896551724138
Estimated cost: 2.137296


  6%|▌         | 175/2955 [17:54<8:04:14, 10.45s/it] 

Total tokens: 76764
Average tokens per issue: 438.6514285714286
Estimated cost: 2.1493919999999997


  6%|▌         | 176/2955 [18:16<10:55:03, 14.14s/it]

Total tokens: 77192
Average tokens per issue: 438.59090909090907
Estimated cost: 2.1613759999999997


  6%|▌         | 177/2955 [18:18<8:06:19, 10.50s/it] 

Total tokens: 77633
Average tokens per issue: 438.6045197740113
Estimated cost: 2.173724


  6%|▌         | 178/2955 [18:42<11:02:00, 14.30s/it]

Total tokens: 78071
Average tokens per issue: 438.6011235955056
Estimated cost: 2.185988


  6%|▌         | 179/2955 [18:43<8:07:46, 10.54s/it] 

Total tokens: 78522
Average tokens per issue: 438.6703910614525
Estimated cost: 2.1986160000000003


  6%|▌         | 180/2955 [19:07<11:03:38, 14.35s/it]

Total tokens: 78970
Average tokens per issue: 438.72222222222223
Estimated cost: 2.21116


  6%|▌         | 181/2955 [19:09<8:12:54, 10.66s/it] 

Total tokens: 79400
Average tokens per issue: 438.6740331491713
Estimated cost: 2.2232000000000003


  6%|▌         | 182/2955 [19:32<11:06:19, 14.42s/it]

Total tokens: 79843
Average tokens per issue: 438.6978021978022
Estimated cost: 2.235604


  6%|▌         | 183/2955 [19:34<8:10:49, 10.62s/it] 

Total tokens: 80284
Average tokens per issue: 438.7103825136612
Estimated cost: 2.247952


  6%|▌         | 183/2955 [19:41<4:58:23,  6.46s/it]


KeyboardInterrupt: 

In [29]:
data.write_json("../data/issues_v1_gpt.json", row_oriented=True, pretty=True)