# 0. Importing packages

In [1]:
# Load the autoreload extension to automatically reload modules before executing code (to avoid restarting the kernel)
%load_ext autoreload 

# Enable autoreload for all modules
%autoreload 2

In [2]:
import pandas as pd
from pathlib import Path
from itertools import chain
from tqdm import tqdm

# Custom functions
from resources.github_functions import GithubScraper

## 0.1 File Paths

In [3]:
fp_main = Path('/Volumes/SAM-SODAS-DISTRACT/Coding Distraction/github_as_a_market_device')
fp_main_output = Path(fp_main / 'output')

# 1 Filtering users and making (named-user,company)-list

## 1.1 Loading in the data on first tier users
> For scrape and sorting of first tier see the notebooks:
* 1. Collecting first-tier users.ipynb
* 2. Sorting first-tier users.ipynb


In [4]:
first_tier_info = pd.read_parquet(fp_main_output / 'first_tier_ties_sorted.parquet.gzip')

### 1.2 Creating a dataframe where a row is a company with a list of potential second tier users

In [5]:
# Aggregate potential second users for each company in the second tier
second_tier_users_and_company = (
    first_tier_info.groupby('search_with_company', as_index=False)['unique_ties']
    .agg(lambda x: list(chain.from_iterable(x)))
)

# Calculate total number of potential second-tier users
numb_of_second_tier_users = second_tier_users_and_company['unique_ties'].str.len().sum()

print(numb_of_second_tier_users)

27752


# 2.2 Instantiating the GithubScraper and scrape

In [6]:
import json
from pathlib import Path

# Initialize sets for tracking
users_already_scraped = set()
companies_already_scraped = set()
users_attempted_scraped = set()

# Paths
first_tier_user_log_file = 'first_tier_userinfo_user_log.jsonl'
second_tier_user_log_file = 'second_tier_userinfo_user_log.jsonl'
users_attempted_scrape_file = 'users_attempted_scrape.jsonl'

first_tier_user_log_path = fp_main_output / first_tier_user_log_file
second_tier_user_log_path = fp_main_output / second_tier_user_log_file
users_attempted_scrape_path = fp_main_output / users_attempted_scrape_file

def ensure_file_exists(path: Path):
    if not path.exists():
        print(f"[INFO] File does not exist. Creating: {path.name}")
        path.touch(exist_ok=True)
    else:
        print(f"[INFO] File exists: {path.name}")

def load_users_from_log(path: Path):
    users = set()
    if path.exists():
        with open(path, "r", encoding="utf-8") as f:
            for line in f:
                try:
                    user_info = json.loads(line)
                    users.add(user_info["user_login"])
                except (json.JSONDecodeError, KeyError) as err:
                    print(f"[WARNING] Skipping malformed user line in {path.name}: {err}")
    return users

# Ensure all files exist
for path in [first_tier_user_log_path, second_tier_user_log_path, users_attempted_scrape_path]:
    ensure_file_exists(path)

# Populate sets
users_already_scraped |= load_users_from_log(first_tier_user_log_path)
users_already_scraped |= load_users_from_log(second_tier_user_log_path)
users_already_attempted = load_users_from_log(users_attempted_scrape_path)

[INFO] File exists: first_tier_userinfo_user_log.jsonl
[INFO] File exists: second_tier_userinfo_user_log.jsonl
[INFO] File exists: users_attempted_scrape.jsonl


In [7]:
# 1. Create instance of GithubScraper
gs = GithubScraper(
    users_already_scraped=users_already_scraped,
    companies_already_scraped=companies_already_scraped,
    users_already_attempted=users_already_attempted,
    repo_limit=300
)

second_tier_users_to_scrape = {
    user: row['search_with_company']
    for _, row in second_tier_users_and_company.iterrows()
    for user in row['unique_ties']
}

print(f'GitHub REST API ratelimit reset time for token {gs.current_token_key} is {gs.reset_time_point}. '
      f'That will be in a little less than {gs.reset_time_in_minutes} minutes.')

# 2. Define output file name
file_name = 'second_tier_userinfo'

# 3. Loop through company queries
for user, search_with_company in tqdm(second_tier_users_to_scrape.items()):

    # 3.3 Check if user is already scraped
    if user in gs.users_already_attempted:
        print(f'[INFO] User {user} already scraped. Skipping.')
        continue

    # Log user to the set of already attempted users
    gs.log_user_scrape_attempt(user, users_attempted_scrape_path)
    gs.users_already_attempted.add(user)

    # 3.1 Get user from the flattened dictionary
    named_user = gs.get_user(user)

    # 3.2 Check if user is None (e.g., if user is not found)
    if named_user is None:
        continue

    # 3.5 Check if user is a relevant user (DK + company)
    user_row = gs.get_user_info(named_user, search_with_company)
    if user_row is None:
        continue  # Skip user if they don't meet scraping criteria

    # 3.3.3 Extract match data
    location_match = user_row.location
    inferred_company = user_row.inferred_company
    matched_company_strings = user_row.matched_company_strings

    # 3.3.4 Save user info and log result
    gs.save_file(user_row, file_name, remove_existing_file=True)
    gs.log_user_w_match(named_user.login, inferred_company, matched_company_strings, location_match, second_tier_user_log_path)
    
    print(f'[INFO] {gs.USERS_SCRAPED} users scraped so far.')

GithubScraper initialized with 3 tokens.
First token in cycle. Initiating ACCESS_TOKEN_1.
GithubScraper initialized with 0 companies and 380 users already scraped.
GitHub REST API ratelimit reset time for token ACCESS_TOKEN_1 is 2025-06-10 12:03:52. That will be in a little less than 5 minutes.


  0%|          | 0/25924 [00:00<?, ?it/s]

[INFO] User karuncs already scraped. Skipping.
[INFO] User heatherm already scraped. Skipping.
[INFO] User runephilosof already scraped. Skipping.
[INFO] User jekyll already scraped. Skipping.
[INFO] User GeoffAbtion already scraped. Skipping.
[INFO] User Aberen already scraped. Skipping.
[INFO] User runephilosof-abtion already scraped. Skipping.
[INFO] User RobWu already scraped. Skipping.
[INFO] User jeppester already scraped. Skipping.
[INFO] User synth already scraped. Skipping.
[INFO] User slamidtfyn already scraped. Skipping.
[INFO] User adionditsak already scraped. Skipping.
[INFO] User bokh already scraped. Skipping.
[INFO] User namuit already scraped. Skipping.
[INFO] User GuldbekLEGO already scraped. Skipping.
[INFO] User martinvintherp already scraped. Skipping.
[INFO] User allowishus-dev already scraped. Skipping.
[INFO] User finnpedersenkazes already scraped. Skipping.
[INFO] User ozf already scraped. Skipping.
[INFO] User MadsZeneli already scraped. Skipping.
[INFO] User 

 33%|███▎      | 8518/25924 [00:02<00:05, 2998.34it/s]

User match torbenholm logged.
[INFO] 381 users scraped so far.
[INFO] User CasperHCH already scraped. Skipping.
[INFO] User Rasmus-Rosendal already scraped. Skipping.
[INFO] User nschlemm already scraped. Skipping.
[INFO] User gahms already scraped. Skipping.
[INFO] User glshank-usgs already scraped. Skipping.
[INFO] User frederikPN already scraped. Skipping.
[INFO] User andersbohn already scraped. Skipping.
[INFO] User henhoy already scraped. Skipping.
[INFO] User jakobhj already scraped. Skipping.
[INFO] User hnvn already scraped. Skipping.
[INFO] User frapontillo already scraped. Skipping.
[INFO] User SimonTob already scraped. Skipping.
[INFO] User morten-egan already scraped. Skipping.
[INFO] User dorotawalden already scraped. Skipping.
[INFO] User mstMiracle already scraped. Skipping.
[INFO] User mRemoteNG already scraped. Skipping.
[INFO] User uknudsen already scraped. Skipping.
[INFO] User noskillahh already scraped. Skipping.
[INFO] User KrestenKjaer already scraped. Skipping.


 33%|███▎      | 8518/25924 [00:20<00:05, 2998.34it/s]

[INFO] User younthu already scraped. Skipping.
[INFO] User larssg already scraped. Skipping.
[INFO] User kfn already scraped. Skipping.


 33%|███▎      | 8587/25924 [00:23<01:03, 271.77it/s] 

User match hellehyllestedlarsen logged.
[INFO] 383 users scraped so far.
[INFO] User 14g0n1 already scraped. Skipping.
[INFO] User martinbaehrenzbjerregaard already scraped. Skipping.
[INFO] User mortenolsson already scraped. Skipping.
[INFO] User RedHatTraining already scraped. Skipping.
[INFO] User kjeldpaw already scraped. Skipping.
[INFO] User CedricGatay already scraped. Skipping.
[INFO] User bryanasdev000 already scraped. Skipping.
[INFO] User juliansteenbakker already scraped. Skipping.
[INFO] User helo already scraped. Skipping.


 33%|███▎      | 8597/25924 [00:24<01:08, 252.33it/s]

User match mbhlawn logged.
[INFO] 384 users scraped so far.
[INFO] User AFNetworking already scraped. Skipping.
[INFO] User fluttercommunity already scraped. Skipping.
[INFO] User mgjhl already scraped. Skipping.
[INFO] User Centralize already scraped. Skipping.
[INFO] User ansible-awx already scraped. Skipping.
[INFO] User adithyaxx already scraped. Skipping.
User match ahoeffner-zz logged.
[INFO] 385 users scraped so far.


 33%|███▎      | 8611/25924 [00:43<03:05, 93.56it/s] 

User technologyarts has more than 300 repos, skipping.


 33%|███▎      | 8647/25924 [01:42<30:21,  9.49it/s]

User sangjiexun has more than 300 repos, skipping.


 33%|███▎      | 8655/25924 [02:02<59:35,  4.83it/s]

User match clknap logged.
[INFO] 386 users scraped so far.


 33%|███▎      | 8665/25924 [02:29<2:10:19,  2.21it/s]

User royswale has more than 300 repos, skipping.


 33%|███▎      | 8675/25924 [02:54<4:25:48,  1.08it/s]

User danielbayley has more than 300 repos, skipping.


 34%|███▎      | 8729/25924 [04:58<6:42:01,  1.40s/it] 

User simonw has more than 300 repos, skipping.


 34%|███▎      | 8749/25924 [05:42<5:37:17,  1.18s/it] 

User knownasilya has more than 300 repos, skipping.


 34%|███▍      | 8766/25924 [06:27<8:21:58,  1.76s/it] 

User alexlafroscia has more than 300 repos, skipping.


 34%|███▍      | 8767/25924 [06:27<6:29:23,  1.36s/it]

User lucylow has more than 300 repos, skipping.


 34%|███▍      | 8776/25924 [06:40<4:33:31,  1.04it/s]

User influx6 has more than 300 repos, skipping.


 34%|███▍      | 8808/25924 [07:50<6:48:48,  1.43s/it] 

User Felix660 has more than 300 repos, skipping.


 34%|███▍      | 8824/25924 [08:35<8:35:30,  1.81s/it] 

User dephora has more than 300 repos, skipping.


 34%|███▍      | 8838/25924 [09:15<11:12:18,  2.36s/it]

User ff6347 has more than 300 repos, skipping.


 34%|███▍      | 8859/25924 [09:56<6:49:29,  1.44s/it] 

User puppycodes has more than 300 repos, skipping.


 34%|███▍      | 8870/25924 [10:30<16:07:53,  3.41s/it]

User alphagov has more than 300 repos, skipping.


 34%|███▍      | 8907/25924 [12:20<9:58:13,  2.11s/it] 

User RobLoach has more than 300 repos, skipping.


 34%|███▍      | 8912/25924 [12:36<10:45:02,  2.28s/it]

User jean has more than 300 repos, skipping.


 34%|███▍      | 8914/25924 [12:37<7:10:28,  1.52s/it] 

User fstfwd has more than 300 repos, skipping.


 34%|███▍      | 8940/25924 [13:36<6:50:32,  1.45s/it] 

User xandrkat has more than 300 repos, skipping.


 35%|███▍      | 9002/25924 [15:54<7:25:22,  1.58s/it] 

User eiriklv has more than 300 repos, skipping.


 35%|███▍      | 9033/25924 [16:58<5:18:42,  1.13s/it] 

User Rud5G has more than 300 repos, skipping.


 35%|███▍      | 9035/25924 [16:59<4:13:05,  1.11it/s]

User denji has more than 300 repos, skipping.


 35%|███▍      | 9062/25924 [18:05<6:22:32,  1.36s/it] 

User nullnotfound has more than 300 repos, skipping.


 35%|███▌      | 9120/25924 [20:11<8:18:40,  1.78s/it] 

User opencollective has more than 300 repos, skipping.


 35%|███▌      | 9122/25924 [20:12<5:41:26,  1.22s/it]

User sambacha has more than 300 repos, skipping.


 36%|███▌      | 9216/25924 [23:57<6:27:50,  1.39s/it] 

User aredridel has more than 300 repos, skipping.


 36%|███▌      | 9248/25924 [25:00<6:02:00,  1.30s/it] 

User christophermoura has more than 300 repos, skipping.


 36%|███▌      | 9250/25924 [25:03<5:47:05,  1.25s/it]

User DevenLu has more than 300 repos, skipping.


 36%|███▌      | 9263/25924 [25:22<7:19:45,  1.58s/it] 

User Brugarolas has more than 300 repos, skipping.


 36%|███▌      | 9277/25924 [25:45<4:32:16,  1.02it/s] 

User MarQuisKnox has more than 300 repos, skipping.


 36%|███▌      | 9282/25924 [25:55<7:06:11,  1.54s/it]

User kt3k has more than 300 repos, skipping.


 36%|███▌      | 9349/25924 [28:34<11:44:09,  2.55s/it]

User barbagrigia has more than 300 repos, skipping.


 36%|███▌      | 9389/25924 [29:57<5:45:22,  1.25s/it] 

User cdaringe has more than 300 repos, skipping.


 37%|███▋      | 9464/25924 [32:46<8:06:36,  1.77s/it] 

[INFO] User magick93 already scraped. Skipping.


 37%|███▋      | 9518/25924 [34:18<4:30:18,  1.01it/s] 

User GerHobbelt has more than 300 repos, skipping.


 37%|███▋      | 9561/25924 [36:00<9:48:29,  2.16s/it] 

User xeronith has more than 300 repos, skipping.


 37%|███▋      | 9568/25924 [36:14<9:42:44,  2.14s/it] 

User bmorelax has more than 300 repos, skipping.


 37%|███▋      | 9584/25924 [36:39<4:55:04,  1.08s/it] 

User thomasdarimont has more than 300 repos, skipping.


 37%|███▋      | 9653/25924 [39:12<6:35:22,  1.46s/it] 

User javafun has more than 300 repos, skipping.


 37%|███▋      | 9702/25924 [41:07<6:13:36,  1.38s/it] 

User plainspace has more than 300 repos, skipping.


 37%|███▋      | 9703/25924 [41:08<4:57:52,  1.10s/it]

User caraya has more than 300 repos, skipping.


 37%|███▋      | 9718/25924 [41:46<9:52:54,  2.20s/it] 

User pombredanne has more than 300 repos, skipping.


 38%|███▊      | 9771/25924 [43:42<8:16:30,  1.84s/it] 

User zoosky has more than 300 repos, skipping.


 38%|███▊      | 9817/25924 [46:23<72:28:32, 16.20s/it]

User match danlars logged.
[INFO] 387 users scraped so far.


 38%|███▊      | 9821/25924 [46:27<20:19:53,  4.55s/it]

User thanhtoan1196 has more than 300 repos, skipping.


 38%|███▊      | 9918/25924 [50:27<6:56:11,  1.56s/it] 

User th3architect has more than 300 repos, skipping.


 39%|███▊      | 10009/25924 [54:10<4:52:26,  1.10s/it] 

User rmatam has more than 300 repos, skipping.


 39%|███▊      | 10024/25924 [54:46<8:55:54,  2.02s/it] 

[INFO] User crudbug already scraped. Skipping.


 39%|███▉      | 10081/25924 [56:57<5:25:25,  1.23s/it] 

User BoxPistols has more than 300 repos, skipping.


 39%|███▉      | 10164/25924 [1:00:12<11:17:03,  2.58s/it]

User byteshiva has more than 300 repos, skipping.


 39%|███▉      | 10175/25924 [1:00:36<6:32:52,  1.50s/it] 

User sa-tasche has more than 300 repos, skipping.


 39%|███▉      | 10178/25924 [1:00:42<8:04:53,  1.85s/it]

[INFO] User lewisbrown already scraped. Skipping.


 39%|███▉      | 10230/25924 [1:02:52<10:11:53,  2.34s/it]

User ichoake has more than 300 repos, skipping.


 40%|███▉      | 10240/25924 [1:03:10<6:29:05,  1.49s/it] 

User ionic-team has more than 300 repos, skipping.


 40%|███▉      | 10262/25924 [1:04:02<9:53:24,  2.27s/it] 

Token cycled to ACCESS_TOKEN_2.
Cycle


 40%|███▉      | 10317/25924 [1:06:35<6:25:13,  1.48s/it] 

User zpaav has more than 300 repos, skipping.


 40%|████      | 10394/25924 [1:09:13<6:21:02,  1.47s/it] 

User limerentfox has more than 300 repos, skipping.


 40%|████      | 10438/25924 [1:10:53<5:26:16,  1.26s/it] 

User edb-c has more than 300 repos, skipping.


 40%|████      | 10483/25924 [1:12:21<5:34:39,  1.30s/it] 

User jasondavis has more than 300 repos, skipping.


 41%|████      | 10508/25924 [1:13:23<4:44:30,  1.11s/it] 

User MULXCODE has more than 300 repos, skipping.


 41%|████      | 10550/25924 [1:14:53<7:31:37,  1.76s/it] 

User wesbos has more than 300 repos, skipping.


 41%|████      | 10689/25924 [1:20:11<7:30:57,  1.78s/it] 

User sergeylukin has more than 300 repos, skipping.


 42%|████▏     | 10790/25924 [1:24:48<10:30:10,  2.50s/it]

User forklifters has more than 300 repos, skipping.


 42%|████▏     | 10842/25924 [1:26:50<7:43:22,  1.84s/it] 

User davgit has more than 300 repos, skipping.


 42%|████▏     | 10848/25924 [1:27:05<8:34:19,  2.05s/it] 

User kreo has more than 300 repos, skipping.


 42%|████▏     | 10849/25924 [1:27:05<6:36:01,  1.58s/it]

User wildcard has more than 300 repos, skipping.


 42%|████▏     | 10860/25924 [1:28:00<49:45:30, 11.89s/it]

User match Phhansen logged.
[INFO] 388 users scraped so far.


 42%|████▏     | 10873/25924 [1:28:33<7:54:47,  1.89s/it] 

User rhdeck has more than 300 repos, skipping.


 42%|████▏     | 10958/25924 [1:31:49<6:34:16,  1.58s/it] 

User NaszvadiG has more than 300 repos, skipping.


 42%|████▏     | 10967/25924 [1:32:18<11:01:39,  2.65s/it]

User manuelramireztech has more than 300 repos, skipping.


 42%|████▏     | 10992/25924 [1:33:25<6:57:43,  1.68s/it] 

User laravel-shift has more than 300 repos, skipping.


 43%|████▎     | 11085/25924 [1:37:32<7:24:46,  1.80s/it] 

User markwu has more than 300 repos, skipping.


 43%|████▎     | 11103/25924 [1:38:23<12:27:46,  3.03s/it]

User vestigegroup has more than 300 repos, skipping.


 43%|████▎     | 11139/25924 [1:39:26<4:43:32,  1.15s/it] 

User algolia has more than 300 repos, skipping.


 43%|████▎     | 11169/25924 [1:40:33<6:10:19,  1.51s/it] 

User Shelob9 has more than 300 repos, skipping.


 43%|████▎     | 11216/25924 [1:43:10<9:37:02,  2.35s/it] 

User mdsohelmia has more than 300 repos, skipping.


 43%|████▎     | 11221/25924 [1:43:21<8:10:34,  2.00s/it] 

User riffus has more than 300 repos, skipping.


 43%|████▎     | 11250/25924 [1:44:38<8:28:53,  2.08s/it] 

User wuwx has more than 300 repos, skipping.


 44%|████▎     | 11289/25924 [1:46:29<6:35:21,  1.62s/it] 

User ManojKiranA has more than 300 repos, skipping.


 44%|████▎     | 11292/25924 [1:46:32<4:33:03,  1.12s/it]

User spatie has more than 300 repos, skipping.


 44%|████▎     | 11331/25924 [1:48:06<7:43:48,  1.91s/it] 

User yhbyun has more than 300 repos, skipping.


 44%|████▎     | 11339/25924 [1:48:22<7:08:40,  1.76s/it] 

User rubythonode has more than 300 repos, skipping.


 44%|████▍     | 11355/25924 [1:48:56<6:08:37,  1.52s/it] 

User od3n has more than 300 repos, skipping.


 44%|████▍     | 11367/25924 [1:49:19<8:51:46,  2.19s/it]

[INFO] User josefeg already scraped. Skipping.


 44%|████▍     | 11421/25924 [1:51:19<8:53:33,  2.21s/it] 

[INFO] User EmmanuelOga already scraped. Skipping.


 44%|████▍     | 11433/25924 [1:51:50<10:05:28,  2.51s/it]

User sjqzhang has more than 300 repos, skipping.


 44%|████▍     | 11508/25924 [1:55:24<8:17:02,  2.07s/it] 

User orinocoz has more than 300 repos, skipping.


 44%|████▍     | 11511/25924 [1:55:28<6:33:01,  1.64s/it]

User jiegec has more than 300 repos, skipping.


 45%|████▍     | 11543/25924 [1:56:53<8:32:25,  2.14s/it] 

User mul14 has more than 300 repos, skipping.


 45%|████▍     | 11565/25924 [1:57:56<6:16:22,  1.57s/it] 

User jumping has more than 300 repos, skipping.


 45%|████▍     | 11595/25924 [1:59:20<5:48:28,  1.46s/it] 

User binRick has more than 300 repos, skipping.


 45%|████▍     | 11597/25924 [1:59:21<4:07:28,  1.04s/it]

User jevinskie has more than 300 repos, skipping.


 45%|████▍     | 11614/25924 [2:00:12<7:41:43,  1.94s/it] 

User nxbdi has more than 300 repos, skipping.


 45%|████▍     | 11649/25924 [2:01:38<15:40:37,  3.95s/it]

[INFO] User avellable already scraped. Skipping.


 45%|████▌     | 11679/25924 [2:03:10<7:56:00,  2.00s/it] 

User cloudflare has more than 300 repos, skipping.


 45%|████▌     | 11681/25924 [2:03:16<9:04:24,  2.29s/it] 

User Conan-Kudo has more than 300 repos, skipping.


 45%|████▌     | 11684/25924 [2:03:19<5:45:28,  1.46s/it]

User Cloudxtreme has more than 300 repos, skipping.


 45%|████▌     | 11689/25924 [2:03:24<3:46:14,  1.05it/s]

User alexellis has more than 300 repos, skipping.


 45%|████▌     | 11704/25924 [2:04:12<14:01:26,  3.55s/it]

User hugmatj has more than 300 repos, skipping.


 45%|████▌     | 11705/25924 [2:04:16<14:36:10,  3.70s/it]

[INFO] User azmras already scraped. Skipping.


 45%|████▌     | 11735/25924 [2:05:26<6:47:44,  1.72s/it] 

[INFO] User mahmoudimus already scraped. Skipping.


 45%|████▌     | 11748/25924 [2:05:56<6:32:24,  1.66s/it] 

User jgarte has more than 300 repos, skipping.


 45%|████▌     | 11773/25924 [2:07:01<11:55:51,  3.04s/it]

User paran0ids0ul has more than 300 repos, skipping.


 46%|████▌     | 11911/25924 [2:13:17<7:06:02,  1.82s/it] 

User langelee has more than 300 repos, skipping.


 46%|████▌     | 11912/25924 [2:13:17<5:29:10,  1.41s/it]

User ghishadow has more than 300 repos, skipping.


 46%|████▌     | 11934/25924 [2:14:10<5:56:11,  1.53s/it] 

User sysbot has more than 300 repos, skipping.


 46%|████▌     | 11942/25924 [2:14:30<9:57:02,  2.56s/it] 

User LinusU has more than 300 repos, skipping.


 46%|████▌     | 11966/25924 [2:15:30<9:57:42,  2.57s/it] 

User backwardn has more than 300 repos, skipping.


 46%|████▋     | 11996/25924 [2:17:01<8:06:27,  2.10s/it] 

User lingling2012 has more than 300 repos, skipping.


 46%|████▋     | 12000/25924 [2:17:11<7:14:24,  1.87s/it] 

User adam-stokes has more than 300 repos, skipping.


 46%|████▋     | 12012/25924 [2:17:45<8:57:05,  2.32s/it] 

User PierceLBrooks has more than 300 repos, skipping.


 46%|████▋     | 12015/25924 [2:17:50<7:03:31,  1.83s/it]

User flathub has more than 300 repos, skipping.


 46%|████▋     | 12037/25924 [2:18:39<9:41:29,  2.51s/it] 

User detailyang has more than 300 repos, skipping.


 46%|████▋     | 12048/25924 [2:19:02<6:09:31,  1.60s/it] 

User wezm has more than 300 repos, skipping.


 47%|████▋     | 12064/25924 [2:19:39<4:18:32,  1.12s/it] 

User markstos has more than 300 repos, skipping.


 47%|████▋     | 12076/25924 [2:20:10<7:51:26,  2.04s/it] 

User gauravssnl has more than 300 repos, skipping.


 47%|████▋     | 12088/25924 [2:20:45<12:31:55,  3.26s/it]

User jpx40 has more than 300 repos, skipping.


 47%|████▋     | 12117/25924 [2:21:54<5:15:29,  1.37s/it] 

User JuryA has more than 300 repos, skipping.


 47%|████▋     | 12130/25924 [2:22:23<5:23:34,  1.41s/it] 

User ksmaheshkumar has more than 300 repos, skipping.


 47%|████▋     | 12143/25924 [2:22:52<11:08:44,  2.91s/it]

[INFO] User valpackett already scraped. Skipping.


 47%|████▋     | 12152/25924 [2:23:15<6:47:14,  1.77s/it] 

User mariszo has more than 300 repos, skipping.


 47%|████▋     | 12157/25924 [2:23:28<8:39:21,  2.26s/it] 

User prodigeni has more than 300 repos, skipping.


 47%|████▋     | 12179/25924 [2:24:19<6:57:51,  1.82s/it] 

User bryant1410 has more than 300 repos, skipping.


 47%|████▋     | 12196/25924 [2:25:03<9:12:46,  2.42s/it] 

User lifa123 has more than 300 repos, skipping.


 47%|████▋     | 12201/25924 [2:25:10<5:29:10,  1.44s/it]

[INFO] User mrluanma already scraped. Skipping.


 47%|████▋     | 12205/25924 [2:25:16<5:10:05,  1.36s/it]

User agilemobiledev has more than 300 repos, skipping.


 47%|████▋     | 12215/25924 [2:25:35<7:28:11,  1.96s/it] 

User LaurentFough has more than 300 repos, skipping.


 47%|████▋     | 12230/25924 [2:26:25<8:51:03,  2.33s/it] 

User leecade has more than 300 repos, skipping.


 47%|████▋     | 12235/25924 [2:26:44<9:36:30,  2.53s/it] 

User stl3 has more than 300 repos, skipping.


 47%|████▋     | 12237/25924 [2:28:50<146:08:34, 38.44s/it]

User match nickknissen logged.
[INFO] 389 users scraped so far.


 47%|████▋     | 12271/25924 [2:30:36<12:24:08,  3.27s/it] 

[INFO] User abecciu already scraped. Skipping.


 48%|████▊     | 12318/25924 [2:32:56<9:15:07,  2.45s/it] 

User godeep has more than 300 repos, skipping.


 48%|████▊     | 12324/25924 [2:33:06<5:40:52,  1.50s/it]

User awesome-security has more than 300 repos, skipping.
[INFO] User filipeamoreira already scraped. Skipping.


 48%|████▊     | 12342/25924 [2:33:53<6:38:24,  1.76s/it] 

User luismayta has more than 300 repos, skipping.


 48%|████▊     | 12368/25924 [2:35:39<16:03:24,  4.26s/it]

User artisdom has more than 300 repos, skipping.


 48%|████▊     | 12371/25924 [2:35:42<7:50:03,  2.08s/it] 

[INFO] User paulj already scraped. Skipping.


 48%|████▊     | 12376/25924 [2:35:51<6:07:44,  1.63s/it]

User jmptrader has more than 300 repos, skipping.


 48%|████▊     | 12378/25924 [2:35:54<5:24:27,  1.44s/it]

User rustyforks has more than 300 repos, skipping.


 48%|████▊     | 12383/25924 [2:36:01<3:56:35,  1.05s/it]

User cawa0505 has more than 300 repos, skipping.


 48%|████▊     | 12395/25924 [2:36:41<18:47:25,  5.00s/it]

[INFO] User mofirouz already scraped. Skipping.


 48%|████▊     | 12397/25924 [2:36:41<10:19:05,  2.75s/it]

User Passw has more than 300 repos, skipping.


 48%|████▊     | 12409/25924 [2:37:05<5:33:46,  1.48s/it] 

User AkihiroSuda has more than 300 repos, skipping.


 48%|████▊     | 12414/25924 [2:37:20<9:15:33,  2.47s/it] 

User bluemutedwisdom has more than 300 repos, skipping.


 48%|████▊     | 12427/25924 [2:38:03<12:19:39,  3.29s/it]

User PratikDhanave has more than 300 repos, skipping.


 48%|████▊     | 12433/25924 [2:38:16<8:19:15,  2.22s/it] 

User udhos has more than 300 repos, skipping.


 48%|████▊     | 12441/25924 [2:38:27<4:03:30,  1.08s/it]

User scantist-ossops-m2 has more than 300 repos, skipping.


 48%|████▊     | 12446/25924 [2:38:40<6:57:30,  1.86s/it]

User skyformat99 has more than 300 repos, skipping.


 48%|████▊     | 12515/25924 [2:41:27<5:16:08,  1.41s/it] 

User standardgalactic has more than 300 repos, skipping.


 48%|████▊     | 12539/25924 [2:42:28<2:53:26,  1.29it/s] 


KeyboardInterrupt: 