In [29]:
with open("gh_access_token", "r") as file:
    access_token = file.read().strip()
with open("sg_access_token", "r") as file:
    sg_access_token = file.read().strip()

import requests
import json
import csv
import pandas as pd


csv_path = 'repos_with_datetime.csv'
df = pd.read_csv(csv_path)

In [35]:
gh_query = """
query($q: String!, $cursor: String) {
  rateLimit {
    remaining
    cost
    used
  }
  search(query:$q, type: ISSUE, first: 100, after:$cursor) {
		pageInfo {
      hasNextPage
      endCursor
    }
    nodes {
      ... on Issue {
        
        title
        bodyHTML
        url
        activeLockReason
        
        
        labels (first:100) {
          nodes {
            name
          }
        }
        
      }
    }
  }
}
"""

sg_query = """
query ($q: String) {
  search(patternType:keyword, query: $q) {
    results {
      matchCount
      results {
        ... on CommitSearchResult {
          url
          label {
            text
          }
          diffPreview {
            value
          }
        }
      }
    }
  }
}
"""


In [26]:
with open("issues.csv", "w") as file:
  writer = csv.writer(file, lineterminator="\n")
  
  row = ["repoName", "title", "bodyHtml", "url", "lockReason", "labels"]
  writer.writerow(row)

In [32]:
url = "https://api.github.com/graphql"
headers = {"Authorization": f"Bearer {access_token}"}

def search_issues(nameWithOwner):
  count = 0
  q = f"repo:{nameWithOwner} is:issue is:closed \"datetime\" AND \"bug\""
  cursor = None
  while (True):
    json = {"query": gh_query, "variables": {"q": q, "cursor": cursor}}
    response = requests.post(url, json=json, headers=headers).json()["data"]

    rateLimit = response["rateLimit"]
    hasNextPage = response["search"]["pageInfo"]["hasNextPage"]
    cursor = response["search"]["pageInfo"]["endCursor"]

    issues = response["search"]["nodes"]


    with open("issues.csv", "a") as file:
      writer = csv.writer(file, lineterminator="\n")

      for issue in issues:
        labels = []

        for l in issue["labels"]["nodes"]:
          labels.append(l["name"])


        row = [nameWithOwner, issue["title"], "<html redacted>", issue["url"], # issue["bodyHTML"]
               issue["activeLockReason"], labels
        ]
        writer.writerow(row)

    if (count % 1 == 0):
      print(f"Requests: {count}, endCursor: {cursor}, remaining: {rateLimit['remaining']}")

    count += 1

    if (not hasNextPage):
      print("done")
      break

for index, row in df.iterrows():
  nameWithOwner = row["nameWithOwner"]
  search_issues(nameWithOwner)


Requests: 0, endCursor: Y3Vyc29yOjE=, remaining: 4999
done
Requests: 0, endCursor: Y3Vyc29yOjE=, remaining: 4998
done
Requests: 0, endCursor: None, remaining: 4997
done
Requests: 0, endCursor: Y3Vyc29yOjEy, remaining: 4996
done
Requests: 0, endCursor: Y3Vyc29yOjE1, remaining: 4995
done
Requests: 0, endCursor: Y3Vyc29yOjEx, remaining: 4994
done


In [44]:
with open("diffs.csv", "w") as file:
  writer = csv.writer(file, lineterminator="\n")
  
  row = ["repoName", "label", "url", "diffPreview"]
  writer.writerow(row)

In [45]:
url = "https://sourcegraph.com/.api/graphql"
headers = {"Authorization": f"token {sg_access_token}"}

def search_diffs(nameWithOwner):
  q = f"context:global repo:^github\\.com/{nameWithOwner}$ type:diff message:bug datetime"
  json = {"query": sg_query, "variables": {"q": q}}
  response = requests.post(url, json=json, headers=headers).json()["data"]
  diffs = response["search"]["results"]["results"]
  with open("diffs.csv", "a") as file:
    writer = csv.writer(file, lineterminator="\n")
    for diff in diffs:
      row = [nameWithOwner, diff["label"]["text"], diff["url"], "<diff redacted>"] #diff["diffPreview"]["value"]
      writer.writerow(row)

for index, row in df.iterrows():
  nameWithOwner = row["nameWithOwner"]
  search_diffs(nameWithOwner)
