In [None]:
import requests
from requests.auth import HTTPBasicAuth
import json
import time
import itertools
import numpy as np
import pandas as pd

In [None]:
def grab_issues(page):
    return requests.get("https://api.github.com/repos/rust-lang/rust/issues", params={
        "state": "all",
        "filter": "all",
        "per_page": 100,
        "page": page
    }, auth=HTTPBasicAuth("user", "password"))

In [None]:
docs = []

In [None]:
i = 0
while True:
    request = grab_issues(i)
    if request.ok:
        if len(request.text) < 10:
            print("Done")
            break
        docs.append(request.text)
        print(i, "Sucessful")
    else:
        print(i, "Failed")
        break
    i += 1

In [None]:
docs = [json.loads(x) for x in docs]

In [None]:
issues = []
for x in docs:
    issues += x

In [None]:
labels = []
ids = []
for issue in issues:
    if issue["id"] not in ids:
        ids.append(issue["id"])
        labels.append([x['name'] for x in issue['labels']])

In [None]:
pairs = []
for x in labels:
    pairs += [tuple(sorted(list(y))) for y in itertools.combinations_with_replacement(x, 2)]
pairs.sort()

In [None]:
counts = dict((key, sum(1 for _ in group)) for key, group in itertools.groupby(pairs))

In [None]:
sims = dict((key, count / (counts[(key[0], key[0])] + counts[(key[1], key[1])] - count)) for key, count in counts.items()) 

In [None]:
df_counts = pd.DataFrame({
    "source": [x[0] for x in counts],
    "target": [x[1] for x in counts],
    "value": [x for x in counts.values()]
})
df_counts.to_csv("../../hw3/data/counts.csv", index=False)

In [None]:
df_sims = pd.DataFrame({
    "source": [x[0] for x in sims],
    "target": [x[1] for x in sims],
    "value": [x for x in sims.values()]
})
df_sims.to_csv("../../hw3/data/sims.csv", index=False)

In [None]:
df_triage = pd.DataFrame(np.union1d(df_sims["source"], df_sims["target"]), columns=["id"])
df_triage["group"] = df_triage["id"].str.split("-", 1).map(lambda x: x[0])
df_triage["group"][df_triage["group"].str.len() > 2] = "None"
df_triage.to_csv("../../hw3/data/triage.csv", index=False)

In [None]:
df_counts = pd.read_csv("../../hw3/data/counts.csv").query("source != target")
df_triage = pd.read_csv("../../hw3/data/triage.csv")
df_triage = df_triage[np.isin(df_triage["id"], df_counts["source"]) | np.isin(df_triage["id"], df_counts["target"])]
with open("../../hw3/data/graph.json", "w") as file:
    json.dump({
        "nodes": json.loads(df_triage.to_json(orient="records")),
        "links": json.loads(df_counts.to_json(orient="records"))
    }, file)