In [1]:
import requests
import json
import csv
import pandas as pd

from global_paths import *

with open(GH_ACCESS_TOKEN, "r") as file:
    gh_access_token = file.read().strip()
with open(SG_ACCESS_TOKEN, "r") as file:
    sg_access_token = file.read().strip()

df = pd.read_csv(REPOS_WITH_DATETIME_PATH)

In [2]:
gh_query = """
query($q: String!, $cursor: String) {
  rateLimit {
    remaining
    cost
    used
  }
  search(query:$q, type: ISSUE, first: 100, after:$cursor) {
		pageInfo {
      hasNextPage
      endCursor
    }
    nodes {
      ... on Issue {
        
        title
        bodyHTML
        url
        activeLockReason
        
        
        labels (first:100) {
          nodes {
            name
          }
        }
        
      }
    }
  }
}
"""

sg_query = """
query ($q: String) {
  search(patternType:keyword, query: $q) {
    results {
      matchCount
      results {
        ... on CommitSearchResult {
          url
          label {
            text
          }
          diffPreview {
            value
          }
        }
      }
    }
  }
}
"""


In [3]:
with open(ISSUES_PATH, "w") as file:
  writer = csv.writer(file, lineterminator="\n")
  
  row = ["repoName", "title", "bodyHtml", "url", "lockReason", "labels"]
  writer.writerow(row)

In [4]:
url = "https://api.github.com/graphql"
headers = {"Authorization": f"Bearer {access_token}"}

def search_issues(nameWithOwner):
  count = 0
  q = f"repo:{nameWithOwner} is:issue is:closed \"datetime\" AND \"bug\""
  cursor = None
  while (True):
    json = {"query": gh_query, "variables": {"q": q, "cursor": cursor}}
    response = requests.post(url, json=json, headers=headers).json()["data"]

    rateLimit = response["rateLimit"]
    hasNextPage = response["search"]["pageInfo"]["hasNextPage"]
    cursor = response["search"]["pageInfo"]["endCursor"]

    issues = response["search"]["nodes"]


    with open(ISSUES_PATH, "a") as file:
      writer = csv.writer(file, lineterminator="\n")

      for issue in issues:
        labels = []

        for l in issue["labels"]["nodes"]:
          labels.append(l["name"])


        row = [nameWithOwner, issue["title"], "<html redacted>", issue["url"], # issue["bodyHTML"]
               issue["activeLockReason"], labels
        ]
        writer.writerow(row)

    if (count % 1 == 0):
      print(f"Requests: {count}, endCursor: {cursor}, remaining: {rateLimit['remaining']}")

    count += 1

    if (not hasNextPage):
      print("done")
      break

for index, row in df.iterrows():
  nameWithOwner = row["nameWithOwner"]
  search_issues(nameWithOwner)


Requests: 0, endCursor: Y3Vyc29yOjE=, remaining: 4989
done
Requests: 0, endCursor: Y3Vyc29yOjE=, remaining: 4988
done
Requests: 0, endCursor: None, remaining: 4987
done
Requests: 0, endCursor: Y3Vyc29yOjEy, remaining: 4986
done
Requests: 0, endCursor: Y3Vyc29yOjE1, remaining: 4985
done
Requests: 0, endCursor: Y3Vyc29yOjEx, remaining: 4984
done
Requests: 0, endCursor: Y3Vyc29yOjI3, remaining: 4983
done
Requests: 0, endCursor: None, remaining: 4982
done
Requests: 0, endCursor: Y3Vyc29yOjE=, remaining: 4981
done
Requests: 0, endCursor: Y3Vyc29yOjYw, remaining: 4980
done
Requests: 0, endCursor: None, remaining: 4979
done
Requests: 0, endCursor: Y3Vyc29yOjg=, remaining: 4978
done
Requests: 0, endCursor: Y3Vyc29yOjk=, remaining: 4977
done
Requests: 0, endCursor: Y3Vyc29yOjQy, remaining: 4976
done
Requests: 0, endCursor: Y3Vyc29yOjEwMA==, remaining: 4975
Requests: 1, endCursor: Y3Vyc29yOjIwMA==, remaining: 4974
Requests: 2, endCursor: Y3Vyc29yOjMwMA==, remaining: 4973
Requests: 3, endCursor: 

In [5]:
df = pd.read_csv(ISSUES_PATH)

In [44]:
with open(DIFFS_PATH, "w") as file:
  writer = csv.writer(file, lineterminator="\n")
  
  row = ["repoName", "label", "url", "diffPreview"]
  writer.writerow(row)

In [45]:
url = "https://sourcegraph.com/.api/graphql"
headers = {"Authorization": f"token {sg_access_token}"}

def search_diffs(nameWithOwner):
  q = f"context:global repo:^github\\.com/{nameWithOwner}$ type:diff message:bug datetime"
  json = {"query": sg_query, "variables": {"q": q}}
  response = requests.post(url, json=json, headers=headers).json()["data"]
  diffs = response["search"]["results"]["results"]
  with open(DIFFS_PATH, "a") as file:
    writer = csv.writer(file, lineterminator="\n")
    for diff in diffs:
      row = [nameWithOwner, diff["label"]["text"], diff["url"], "<diff redacted>"] #diff["diffPreview"]["value"]
      writer.writerow(row)

for index, row in df.iterrows():
  nameWithOwner = row["nameWithOwner"]
  search_diffs(nameWithOwner)
