# Generate insights on GitHub PRs in the last 6 months

In [1]:
import os

import pandas as pd
from dotenv import load_dotenv
from py_github.py_github import PyGithub
import sqlite3

In [2]:
# load .env file
load_dotenv()
pygh_username = os.getenv("PYGH_USER")
pygh_token = os.getenv("PYGH_TOKEN")
pygh = PyGithub(pygh_username, pygh_token)

sql_conn = sqlite3.connect("./data/github.db")

## Get all the repos for the user specified in the .env file

In [3]:
repos = pygh.get_repos()

In [4]:
repos_df = pd.DataFrame(repos)
repos_df.head()
repos_slim_df = repos_df.filter(['id', 'name', 'full_name', 'description', 'url'])
# # repos_sql_data_df.head()
# # write repos to sqllite db
repos_slim_df.to_sql("repos", sql_conn, if_exists="replace")


143

# get all closed pull requests for repos in the last 6 months

In [11]:
# from concurrent.futures import ThreadPoolExecutor
import concurrent.futures
prs = {}

#truncate table
cur = sql_conn.cursor()
cur.execute("DROP TABLE IF EXISTS 'prs'")
cur.close()

with concurrent.futures.ThreadPoolExecutor(max_workers=30) as executor:
    futures = []
    sql_futures = []
    prs = []
    for index, repo in repos_slim_df.iterrows():
        futures.append(executor.submit(pygh.get_repo_pull_requests,  repo_full_name=repo['full_name'] ,state='closed', length_in_months=6))
        for future in concurrent.futures.as_completed(futures):
            res = future.result()
            if  res and len(res) > 0:
                for pr in res:
                    repo_id = pr['head']['repo']['id']
                    pr_dict = {
                        'number': pr['number'],
                        'title': pr['title'],
                        'repo_id': repo_id,
                        'created_at': pr['created_at'],
                        'updated_at': pr['updated_at'],
                        'closed_dat': pr['closed_at'],
                        'merged_at':  pr['merged_at']
                    }
                    prs.append(pr_dict)
    prs_df = pd.DataFrame(prs).set_index('number')
    prs_df.to_sql('prs', sql_conn, if_exists='replace')
