In [None]:
import json
import re
import requests
import pyquery
import time
from tqdm.notebook import tqdm
from pyquery import PyQuery

In [None]:
from config import gitcoin_cookies, github_token

# grant

In [None]:
grant_profiles = {}

In [None]:
for grant_id in tqdm(range(1,3000)):
    data = requests.get(f"https://gitcoin.co/api/v0.1/grants/?pk={grant_id}").json()
    if data:
        grant_profiles[grant_id] = data[0]
    
    time.sleep(1)

In [None]:
with open('grants_profile.json', 'w') as file:
    json.dump(grant_profiles, file, ensure_ascii=False)

# contribution hostory

In [None]:
with open('grants_profile.json', 'r') as file:
    grant_profiles = json.load(file)

In [None]:
contributions = []

In [None]:
for grant_id in tqdm(grant_profiles.keys()):
    page = 1
    
    while page:
        try:
            res = requests.get(f"https://gitcoin.co/grants/v1/api/grant/{grant_id}/contributions?page={page}&limit=100")
            data = res.json()
        except:
            time.sleep(10)
            continue
            
        for contribution in data['contributions']:
            contribution['grant'] = grant_id
            
        contributions.extend(data['contributions'])
        page = data['next_page_number']

In [None]:
added_ids = set()
contributions = [x for x in contributions if x['id'] not in added_ids and not added_ids.add(x['id'])]

In [None]:
with open('contributions.json', 'w') as file:
    json.dump(contributions, file, ensure_ascii=False)

# gitcoin account & github account

In [None]:
with open('contributions.json', 'r') as file:
    contributions = json.load(file)

In [None]:
account_ids = list(set(x['subscription']['contributor_profile'] for x in contributions))

In [None]:
account_profiles = []
for account_id in tqdm(account_ids):
    try:
        res = requests.get(f"https://gitcoin.co/{account_id}", cookies=gitcoin_cookies)
        res.raise_for_status()
    except:
        continue
        
    html = PyQuery(res.text)
    
    if not html.find(".profile-header__handle"):
        continue

    introduction = html.find(".review-comment").text() or ''
    interests = list(set([x.text for x in html.find(".my-2 + .mt-1 .tag-list__item")]))
    skills = list(set([x.text for x in html.find(".card-body .tag-list__item")])) or ''
    followers_count = int(re.search('(\d+)', html.find("#follower_count").text()).group())
    account_profiles.append({
        'id':account_id, 'introduction':introduction, 'interests':interests, 'skills':skills, 
        'followers_count': followers_count
    })

In [None]:
with open('account_profiles.json', 'w') as file:
    json.dump(account_profiles, file, ensure_ascii=False)

In [None]:
github_profiles = []

In [None]:
for account_id in tqdm(account_ids):
    query = """
        {
          user(login: "%s") {
            login
            createdAt
            repositories{
              totalCount
            }
            followers{
              totalCount
            }
            following{
              totalCount
            }
            starredRepositories{
              totalCount
            }
            gists{
              totalCount
            }
            projects{
              totalCount
            }
          }
        }
    """ % account_id
    
    data = requests.post(
        "https://api.github.com/graphql", 
        headers={'Authorization': f'bearer {github_token}'}, 
        json={"query":query}
    ).json()
    
    if 'errors' in data:
        continue
        
    github_profiles.append(data['data']['user'])
    
    time.sleep(1)

In [None]:
with open('github_profiles.json', 'w') as file:
    json.dump(github_profiles, file, ensure_ascii=False)