# 0. Importing packages

In [1]:
# Load the autoreload extension to automatically reload modules before executing code (to avoid restarting the kernel)
%load_ext autoreload 

# Enable autoreload for all modules
%autoreload 2

In [2]:
import pandas as pd
from pathlib import Path
from itertools import chain
from tqdm import tqdm

# Custom functions
from resources.github_functions import GithubScraper

## 0.1 File Paths

In [3]:
fp_main = Path('/Volumes/SAM-SODAS-DISTRACT/Coding Distraction/github_as_a_market_device')
fp_main_output = Path(fp_main / 'output')

# 1 Filtering users and making (named-user,company)-list

## 1.1 Loading in the data on first tier users
> For scrape and sorting of first tier see the notebooks:
* 1. Collecting first-tier users.ipynb
* 2. Sorting first-tier users.ipynb


In [4]:
first_tier_info = pd.read_parquet(fp_main_output / 'first_tier_ties_sorted.parquet.gzip')

### 1.2 Creating a dataframe where a row is a company with a list of potential second tier users

In [5]:
# Aggregate potential second users for each company in the second tier
second_tier_users_and_company = (
    first_tier_info.groupby('search_with_company', as_index=False)['unique_ties']
    .agg(lambda x: list(chain.from_iterable(x)))
)

# Calculate total number of potential second-tier users
numb_of_second_tier_users = second_tier_users_and_company['unique_ties'].str.len().sum()

print(numb_of_second_tier_users)

27752


# 2.2 Instantiating the GithubScraper and scrape

In [6]:
import json
from pathlib import Path

# Initialize sets for tracking
users_already_scraped = set()
companies_already_scraped = set()
users_attempted_scraped = set()

# Paths
first_tier_user_log_file = 'first_tier_userinfo_user_log.jsonl'
second_tier_user_log_file = 'second_tier_userinfo_user_log.jsonl'
users_attempted_scrape_file = 'users_attempted_scrape.jsonl'

first_tier_user_log_path = fp_main_output / first_tier_user_log_file
second_tier_user_log_path = fp_main_output / second_tier_user_log_file
users_attempted_scrape_path = fp_main_output / users_attempted_scrape_file

def ensure_file_exists(path: Path):
    if not path.exists():
        print(f"[INFO] File does not exist. Creating: {path.name}")
        path.touch(exist_ok=True)
    else:
        print(f"[INFO] File exists: {path.name}")

def load_users_from_log(path: Path):
    users = set()
    if path.exists():
        with open(path, "r", encoding="utf-8") as f:
            for line in f:
                try:
                    user_info = json.loads(line)
                    users.add(user_info["user_login"])
                except (json.JSONDecodeError, KeyError) as err:
                    print(f"[WARNING] Skipping malformed user line in {path.name}: {err}")
    return users

# Ensure all files exist
for path in [first_tier_user_log_path, second_tier_user_log_path, users_attempted_scrape_path]:
    ensure_file_exists(path)

# Populate sets
users_already_scraped |= load_users_from_log(first_tier_user_log_path)
users_already_scraped |= load_users_from_log(second_tier_user_log_path)
users_already_attempted = load_users_from_log(users_attempted_scrape_path)

[INFO] File exists: first_tier_userinfo_user_log.jsonl
[INFO] File exists: second_tier_userinfo_user_log.jsonl
[INFO] File exists: users_attempted_scrape.jsonl


In [7]:
# 1. Create instance of GithubScraper
gs = GithubScraper(
    users_already_scraped=users_already_scraped,
    companies_already_scraped=companies_already_scraped,
    users_already_attempted=users_already_attempted,
    repo_limit=300
)

second_tier_users_to_scrape = {
    user: row['search_with_company']
    for _, row in second_tier_users_and_company.iterrows()
    for user in row['unique_ties']
}

print(f'GitHub REST API ratelimit reset time for token {gs.current_token_key} is {gs.reset_time_point}. '
      f'That will be in a little less than {gs.reset_time_in_minutes} minutes.')

# 2. Define output file name
file_name = 'second_tier_userinfo'

# 3. Loop through company queries
for user, search_with_company in tqdm(second_tier_users_to_scrape.items()):

    # 3.3 Check if user is already scraped
    if user in gs.users_already_attempted:
        print(f'[INFO] User {user} already scraped. Skipping.')
        continue

    # Log user to the set of already attempted users
    gs.log_user_scrape_attempt(user, users_attempted_scrape_path)
    gs.users_already_attempted.add(user)

    # 3.1 Get user from the flattened dictionary
    named_user = gs.get_user(user)

    # 3.2 Check if user is None (e.g., if user is not found)
    if named_user is None:
        continue

    # 3.5 Check if user is a relevant user (DK + company)
    user_row = gs.get_user_info(named_user, search_with_company)
    if user_row is None:
        continue  # Skip user if they don't meet scraping criteria

    # 3.3.3 Extract match data
    location_match = user_row.location
    inferred_company = user_row.inferred_company
    matched_company_strings = user_row.matched_company_strings

    # 3.3.4 Save user info and log result
    gs.save_file(user_row, file_name, remove_existing_file=True)
    gs.log_user_w_match(named_user.login, inferred_company, matched_company_strings, location_match, second_tier_user_log_path)
    
    print(f'[INFO] {gs.USERS_SCRAPED} users scraped so far.')

GithubScraper initialized with 3 tokens.
First token in cycle. Initiating ACCESS_TOKEN_1.
GithubScraper initialized with 0 companies and 303 users already scraped.
GitHub REST API ratelimit reset time for token ACCESS_TOKEN_1 is 2025-06-02 12:55:14. That will be in a little less than 17 minutes.


  0%|          | 0/25924 [00:00<?, ?it/s]

[INFO] User martinvintherp already scraped. Skipping.
[INFO] User Tejs-Abtion already scraped. Skipping.
[INFO] User hrithikt already scraped. Skipping.
[INFO] User martincarlsen already scraped. Skipping.
[INFO] User morgenhaar already scraped. Skipping.
[INFO] User cosmic-banana already scraped. Skipping.
[INFO] User parseb already scraped. Skipping.
[INFO] User iox already scraped. Skipping.
[INFO] User aboedker already scraped. Skipping.
[INFO] User mulky-sulaiman already scraped. Skipping.
[INFO] User simonask already scraped. Skipping.
[INFO] User finnpedersenkazes already scraped. Skipping.
[INFO] User pedryvo already scraped. Skipping.
[INFO] User substancelab already scraped. Skipping.
[INFO] User MartJohannsen already scraped. Skipping.
[INFO] User runephilosof already scraped. Skipping.
[INFO] User Kosai106 already scraped. Skipping.
[INFO] User ozf already scraped. Skipping.
[INFO] User JijoBose already scraped. Skipping.
[INFO] User AlejandraValdivia already scraped. Skipp

 24%|██▍       | 6183/25924 [00:21<01:29, 221.54it/s]

[INFO] User romanIlyushin-bc already scraped. Skipping.
[INFO] User sapegin already scraped. Skipping.


 24%|██▍       | 6192/25924 [00:41<04:23, 74.82it/s] 

User Uvacoder has more than 300 repos, skipping.
[INFO] User TobiasRoland123 already scraped. Skipping.
User paralin has more than 300 repos, skipping.


 24%|██▍       | 6202/25924 [01:01<10:48, 30.42it/s]

[INFO] User estherfinsen already scraped. Skipping.
User match ndamkjaer logged.
[INFO] 304 users scraped so far.


 24%|██▍       | 6223/25924 [01:44<51:45,  6.34it/s]

[INFO] User ccheney already scraped. Skipping.


 24%|██▍       | 6229/25924 [01:54<1:12:27,  4.53it/s]

[INFO] User iamcsk already scraped. Skipping.


 24%|██▍       | 6238/25924 [02:07<2:03:19,  2.66it/s]

[INFO] User thebuilder already scraped. Skipping.


 24%|██▍       | 6241/25924 [02:17<3:11:35,  1.71it/s]

[INFO] User f3d0t already scraped. Skipping.


 24%|██▍       | 6256/25924 [03:01<12:37:40,  2.31s/it]

User mathiasbynens has more than 300 repos, skipping.


 24%|██▍       | 6258/25924 [03:03<8:24:14,  1.54s/it] 

User addyosmani has more than 300 repos, skipping.


 24%|██▍       | 6281/25924 [03:56<7:29:02,  1.37s/it] 

[INFO] User TrellixVulnTeam already scraped. Skipping.


 24%|██▍       | 6283/25924 [03:56<4:38:59,  1.17it/s]

User makinacorpus has more than 300 repos, skipping.


 24%|██▍       | 6293/25924 [04:20<9:40:20,  1.77s/it] 

[INFO] User apache already scraped. Skipping.


 24%|██▍       | 6296/25924 [04:32<17:00:56,  3.12s/it]

User match UniSoftDeveloper logged.
[INFO] 305 users scraped so far.


 24%|██▍       | 6321/25924 [05:11<5:15:13,  1.04it/s] 

User mohsinonxrm has more than 300 repos, skipping.


 24%|██▍       | 6329/25924 [05:23<5:33:53,  1.02s/it] 

User asears has more than 300 repos, skipping.


 24%|██▍       | 6341/25924 [05:44<5:47:51,  1.07s/it] 

User lulzzz has more than 300 repos, skipping.


 25%|██▍       | 6362/25924 [06:13<5:42:38,  1.05s/it] 

User 0xced has more than 300 repos, skipping.


 25%|██▍       | 6394/25924 [07:06<7:28:48,  1.38s/it] 

[INFO] User oscarftoro already scraped. Skipping.


 25%|██▍       | 6414/25924 [07:31<7:28:53,  1.38s/it]

User gitter-badger has more than 300 repos, skipping.


 25%|██▍       | 6416/25924 [07:33<5:27:19,  1.01s/it]

User julrichkieffer has more than 300 repos, skipping.


 25%|██▍       | 6427/25924 [07:59<10:27:49,  1.93s/it]

User loupdemon has more than 300 repos, skipping.


 25%|██▍       | 6450/25924 [08:34<6:36:54,  1.22s/it] 

User joelvaneenwyk has more than 300 repos, skipping.


 25%|██▍       | 6464/25924 [08:50<5:26:16,  1.01s/it]

User tchigher has more than 300 repos, skipping.


 25%|██▌       | 6500/25924 [10:18<57:10:46, 10.60s/it]

User match Leiyiliro logged.
[INFO] 306 users scraped so far.


 25%|██▌       | 6509/25924 [11:03<63:27:14, 11.77s/it]

User match mkholt logged.
[INFO] 307 users scraped so far.


 25%|██▌       | 6513/25924 [11:07<18:46:25,  3.48s/it]

User forki has more than 300 repos, skipping.


 25%|██▌       | 6528/25924 [12:34<114:35:39, 21.27s/it]

User match JonasGLund99 logged.
[INFO] 308 users scraped so far.


 25%|██▌       | 6600/25924 [14:48<4:56:25,  1.09it/s]  

User haf has more than 300 repos, skipping.


 26%|██▌       | 6612/25924 [15:08<8:58:37,  1.67s/it] Request GET /users/phantompepper failed with 403: Forbidden
Setting next backoff to 1279.404218s
 26%|██▌       | 6613/25924 [1:43:40<8555:04:54, 1594.86s/it]

User match ErikEJ logged.
[INFO] 309 users scraped so far.


 26%|██▌       | 6738/25924 [1:48:23<73:21:08, 13.76s/it]    

User match Kristian-Knudsen logged.
[INFO] 310 users scraped so far.


 26%|██▌       | 6748/25924 [1:48:37<8:24:32,  1.58s/it] 

Token cycled to ACCESS_TOKEN_2.
Cycle


 26%|██▌       | 6749/25924 [1:56:40<776:17:44, 145.75s/it]

User match delegateas logged.
[INFO] 311 users scraped so far.


 26%|██▌       | 6801/25924 [2:00:07<195:28:20, 36.80s/it] 

User match madsthom logged.
[INFO] 312 users scraped so far.


 26%|██▌       | 6804/25924 [2:00:13<74:06:10, 13.95s/it] 

User match Uthde1 logged.
[INFO] 313 users scraped so far.


 26%|██▋       | 6835/25924 [2:00:52<18:25:26,  3.47s/it]

User match JesperNoerregaard logged.
[INFO] 314 users scraped so far.


 26%|██▋       | 6850/25924 [2:01:09<8:59:10,  1.70s/it] 

User match anshul-eg logged.
[INFO] 315 users scraped so far.


 26%|██▋       | 6863/25924 [2:01:28<11:32:11,  2.18s/it]

User match kshet98 logged.
[INFO] 316 users scraped so far.


 26%|██▋       | 6866/25924 [2:01:33<10:51:22,  2.05s/it]

User match dedka-eg-dk logged.
[INFO] 317 users scraped so far.


 27%|██▋       | 6874/25924 [2:01:44<10:33:29,  2.00s/it]

User match tibereg logged.
[INFO] 318 users scraped so far.


 27%|██▋       | 6880/25924 [2:02:08<34:03:28,  6.44s/it]

User match Enegia logged.
[INFO] 319 users scraped so far.


 27%|██▋       | 6902/25924 [2:02:37<18:26:39,  3.49s/it]

User match egdw-maufr logged.
[INFO] 320 users scraped so far.


 27%|██▋       | 6911/25924 [2:02:48<9:33:34,  1.81s/it] 

User match sandeepakn logged.
[INFO] 321 users scraped so far.


 27%|██▋       | 6925/25924 [2:03:05<9:48:01,  1.86s/it]

User match mohvi-dev logged.
[INFO] 322 users scraped so far.


 27%|██▋       | 6926/25924 [2:03:54<84:10:24, 15.95s/it]

User match Mohammediliyas766 logged.
[INFO] 323 users scraped so far.


 27%|██▋       | 6936/25924 [2:04:07<13:17:11,  2.52s/it]

User match raval-eg logged.
[INFO] 324 users scraped so far.


 27%|██▋       | 6959/25924 [2:04:34<11:21:18,  2.16s/it]

User match sujan238 logged.
[INFO] 325 users scraped so far.


 27%|██▋       | 6981/25924 [2:05:00<12:11:55,  2.32s/it]

User match neerup logged.
[INFO] 326 users scraped so far.


 27%|██▋       | 6983/25924 [2:05:08<18:26:27,  3.50s/it]

User match jayden-eg-dk logged.
[INFO] 327 users scraped so far.


 27%|██▋       | 6991/25924 [2:06:22<110:47:56, 21.07s/it]

User match egdw-olesh logged.
[INFO] 328 users scraped so far.


 27%|██▋       | 6993/25924 [2:06:26<60:33:00, 11.51s/it] 

User match xxntc-eg logged.
[INFO] 329 users scraped so far.


 27%|██▋       | 7026/25924 [2:07:03<9:13:53,  1.76s/it] 

User match chiko-eg-dk logged.
[INFO] 330 users scraped so far.


 27%|██▋       | 7032/25924 [2:07:11<9:18:37,  1.77s/it]

User match gjrao-droid logged.
[INFO] 331 users scraped so far.


 27%|██▋       | 7036/25924 [2:07:18<10:14:03,  1.95s/it]

User match mazko-eg logged.
[INFO] 332 users scraped so far.


 27%|██▋       | 7044/25924 [2:07:29<9:57:13,  1.90s/it] 

User match VIKKUEG logged.
[INFO] 333 users scraped so far.


 27%|██▋       | 7061/25924 [2:07:49<12:17:06,  2.34s/it]

User match gkath-eg logged.
[INFO] 334 users scraped so far.


 27%|██▋       | 7066/25924 [2:07:56<9:40:50,  1.85s/it] 

User match xxqeq-eg logged.
[INFO] 335 users scraped so far.


 27%|██▋       | 7067/25924 [2:07:59<11:49:50,  2.26s/it]

User match vijloegdk logged.
[INFO] 336 users scraped so far.


 27%|██▋       | 7096/25924 [2:08:39<12:34:53,  2.41s/it]

User match JPFoxtrot logged.
[INFO] 337 users scraped so far.


 27%|██▋       | 7102/25924 [2:08:47<9:26:23,  1.81s/it] 

User match vjopi logged.
[INFO] 338 users scraped so far.


 27%|██▋       | 7104/25924 [2:08:51<10:49:29,  2.07s/it]

User match raaku-eg logged.
[INFO] 339 users scraped so far.


 27%|██▋       | 7107/25924 [2:08:56<10:31:11,  2.01s/it]

User match egdw-tajel logged.
[INFO] 340 users scraped so far.


 27%|██▋       | 7124/25924 [2:09:16<10:12:08,  1.95s/it]

User match RamakanthShenoyM logged.
[INFO] 341 users scraped so far.


 28%|██▊       | 7138/25924 [2:09:31<8:53:00,  1.70s/it] 

User match laphiegd logged.
[INFO] 342 users scraped so far.


 28%|██▊       | 7153/25924 [2:10:04<10:57:11,  2.10s/it]

User match egdw-elnai logged.
[INFO] 343 users scraped so far.


 28%|██▊       | 7188/25924 [2:10:42<11:35:09,  2.23s/it]

User match AJayasabeen logged.
[INFO] 344 users scraped so far.


 28%|██▊       | 7190/25924 [2:10:48<13:37:12,  2.62s/it]

User match RanjithKB-EG logged.
[INFO] 345 users scraped so far.


 28%|██▊       | 7206/25924 [2:11:16<7:07:39,  1.37s/it] 

User github has more than 300 repos, skipping.


 28%|██▊       | 7218/25924 [2:11:49<18:14:23,  3.51s/it]

User MicrosoftDocs has more than 300 repos, skipping.


 28%|██▊       | 7228/25924 [2:12:01<5:35:45,  1.08s/it] 

User Azure has more than 300 repos, skipping.


 28%|██▊       | 7232/25924 [2:12:06<5:27:24,  1.05s/it]

User Azure-Samples has more than 300 repos, skipping.


 28%|██▊       | 7234/25924 [2:12:13<14:03:58,  2.71s/it]

User match maskjaer logged.
[INFO] 346 users scraped so far.


 28%|██▊       | 7235/25924 [2:12:18<16:29:31,  3.18s/it]

User match kasper-krogh-ffw logged.
[INFO] 347 users scraped so far.


 28%|██▊       | 7282/25924 [2:13:33<6:41:27,  1.29s/it] 

User donjordano has more than 300 repos, skipping.


 28%|██▊       | 7284/25924 [2:13:41<12:05:20,  2.33s/it]

User vuchkov has more than 300 repos, skipping.


 28%|██▊       | 7303/25924 [2:14:28<28:39:41,  5.54s/it]

User match nielslbeck logged.
[INFO] 348 users scraped so far.


 28%|██▊       | 7304/25924 [2:14:29<20:43:21,  4.01s/it]

User gaoypChina has more than 300 repos, skipping.


 28%|██▊       | 7306/25924 [2:14:54<47:44:17,  9.23s/it]

User match Skovvart logged.
[INFO] 349 users scraped so far.


 28%|██▊       | 7314/25924 [2:18:36<336:35:46, 65.11s/it]

User match mbudde logged.
[INFO] 350 users scraped so far.


 28%|██▊       | 7317/25924 [2:19:49<226:50:38, 43.89s/it]

User match degeberg logged.
[INFO] 351 users scraped so far.


 28%|██▊       | 7319/25924 [2:20:43<194:29:43, 37.63s/it]

User match anielsen logged.
[INFO] 352 users scraped so far.


 28%|██▊       | 7326/25924 [2:26:54<584:11:55, 113.08s/it]

User match Eckankar logged.
[INFO] 353 users scraped so far.


 28%|██▊       | 7337/25924 [2:27:19<21:51:44,  4.23s/it]  

User googlearchive has more than 300 repos, skipping.


 28%|██▊       | 7346/25924 [2:27:38<9:25:27,  1.83s/it] 

User simonrw has more than 300 repos, skipping.


 28%|██▊       | 7360/25924 [2:28:11<16:12:00,  3.14s/it]

User hermetique has more than 300 repos, skipping.


 28%|██▊       | 7369/25924 [2:28:35<12:42:06,  2.46s/it]

User dcn01 has more than 300 repos, skipping.


 29%|██▊       | 7398/25924 [2:30:31<62:49:53, 12.21s/it]

User match jobindex logged.
[INFO] 354 users scraped so far.


 29%|██▊       | 7434/25924 [2:31:54<21:56:53,  4.27s/it]

[INFO] User zeon-neon already scraped. Skipping.


 29%|██▊       | 7436/25924 [2:33:02<92:35:28, 18.03s/it]

User match ulrikrasmussen logged.
[INFO] 355 users scraped so far.


 29%|██▊       | 7450/25924 [2:33:49<14:56:14,  2.91s/it]

[INFO] User jlouis already scraped. Skipping.


 29%|██▉       | 7459/25924 [2:34:02<7:01:37,  1.37s/it] 

User sjl421 has more than 300 repos, skipping.


 29%|██▉       | 7523/25924 [2:37:10<27:33:51,  5.39s/it]

User match jladefoged logged.
[INFO] 356 users scraped so far.


 29%|██▉       | 7569/25924 [2:38:46<10:53:23,  2.14s/it]

User 418sec has more than 300 repos, skipping.


 29%|██▉       | 7581/25924 [2:39:12<8:51:51,  1.74s/it] 

User sjoerdapp has more than 300 repos, skipping.


 29%|██▉       | 7598/25924 [2:40:35<79:26:00, 15.60s/it]

User match bjornbugge logged.
[INFO] 357 users scraped so far.


 29%|██▉       | 7606/25924 [2:40:48<11:13:53,  2.21s/it]

User AaronLaw has more than 300 repos, skipping.


 29%|██▉       | 7612/25924 [2:41:00<9:34:05,  1.88s/it] 

User muesli has more than 300 repos, skipping.
[INFO] User ghcraig already scraped. Skipping.


 29%|██▉       | 7623/25924 [2:41:21<10:28:18,  2.06s/it]

Token cycled to ACCESS_TOKEN_3.
Cycle


 29%|██▉       | 7631/25924 [2:41:34<9:29:07,  1.87s/it] 

User mozilla has more than 300 repos, skipping.


 29%|██▉       | 7632/25924 [2:41:34<7:24:00,  1.46s/it]

User d3v3l0 has more than 300 repos, skipping.


 30%|██▉       | 7688/25924 [2:43:54<8:21:42,  1.65s/it] 

User sahwar has more than 300 repos, skipping.


 30%|██▉       | 7694/25924 [2:48:01<355:51:11, 70.27s/it]

User match nwillems logged.
[INFO] 358 users scraped so far.


 30%|██▉       | 7703/25924 [2:48:14<19:53:15,  3.93s/it] 

User doy has more than 300 repos, skipping.


 30%|██▉       | 7726/25924 [2:49:15<9:12:15,  1.82s/it] 

User tazjel has more than 300 repos, skipping.


 30%|██▉       | 7736/25924 [2:49:40<9:55:04,  1.96s/it] 

User kba has more than 300 repos, skipping.


 30%|██▉       | 7737/25924 [2:49:42<8:37:37,  1.71s/it]

[INFO] User OfficialCodeVoyage already scraped. Skipping.


 30%|██▉       | 7747/25924 [2:49:58<7:56:40,  1.57s/it] 

User shekkbuilder has more than 300 repos, skipping.


 30%|██▉       | 7761/25924 [2:50:31<10:13:35,  2.03s/it]

User saltstack-formulas has more than 300 repos, skipping.


 30%|██▉       | 7765/25924 [2:50:42<14:53:02,  2.95s/it]

User match khzavaleo logged.
[INFO] 359 users scraped so far.


 30%|██▉       | 7771/25924 [2:50:52<12:25:40,  2.46s/it]

User match MichalKarmanski logged.
[INFO] 360 users scraped so far.


 30%|██▉       | 7774/25924 [2:51:35<67:09:41, 13.32s/it]

User match r-d-kmd logged.
[INFO] 361 users scraped so far.


 30%|███       | 7784/25924 [2:51:48<10:16:11,  2.04s/it]

User match jansommer logged.
[INFO] 362 users scraped so far.


 30%|███       | 7788/25924 [2:52:08<28:58:12,  5.75s/it]

User match MKringelhede logged.
[INFO] 363 users scraped so far.


 30%|███       | 7791/25924 [2:52:13<15:56:09,  3.16s/it]

User match wnuczekkrzysztof logged.
[INFO] 364 users scraped so far.


 30%|███       | 7813/25924 [2:52:38<10:25:03,  2.07s/it]

User match twl-kmd logged.
[INFO] 365 users scraped so far.


 30%|███       | 7829/25924 [2:53:02<9:29:15,  1.89s/it] 

User match mjzAvaleo logged.
[INFO] 366 users scraped so far.


 30%|███       | 7833/25924 [2:53:08<8:53:36,  1.77s/it]

User match bearous logged.
[INFO] 367 users scraped so far.


 30%|███       | 7843/25924 [2:53:34<21:53:41,  4.36s/it]

User match Barium logged.
[INFO] 368 users scraped so far.


 30%|███       | 7862/25924 [2:55:11<109:22:17, 21.80s/it]

User match avaleo logged.
[INFO] 369 users scraped so far.


 31%|███       | 7964/25924 [3:00:12<9:14:38,  1.85s/it]  

User GetStream has more than 300 repos, skipping.


 31%|███       | 7975/25924 [3:00:39<7:55:27,  1.59s/it] 

User jondot has more than 300 repos, skipping.


 31%|███       | 7987/25924 [3:01:15<12:54:50,  2.59s/it]

User objcio has more than 300 repos, skipping.


 31%|███       | 8004/25924 [3:01:56<7:24:42,  1.49s/it] 

User jamesmontemagno has more than 300 repos, skipping.


 31%|███       | 8008/25924 [3:02:01<7:52:12,  1.58s/it]

[INFO] User kstenerud already scraped. Skipping.


 31%|███       | 8023/25924 [3:02:29<5:26:14,  1.09s/it] 

User jsuarezruiz has more than 300 repos, skipping.


 31%|███       | 8032/25924 [3:02:49<14:11:30,  2.86s/it]

[INFO] User geerlingguy already scraped. Skipping.


 31%|███       | 8038/25924 [3:03:03<14:10:01,  2.85s/it]

User ericlewis has more than 300 repos, skipping.


 31%|███       | 8062/25924 [3:03:53<8:44:57,  1.76s/it] 

User apple has more than 300 repos, skipping.


 31%|███       | 8090/25924 [3:05:04<6:42:41,  1.35s/it] 

User mapbox has more than 300 repos, skipping.


 31%|███       | 8091/25924 [3:05:04<5:23:54,  1.09s/it]

User kevinsimper has more than 300 repos, skipping.


 31%|███       | 8099/25924 [3:05:18<6:37:08,  1.34s/it] 

User facebookarchive has more than 300 repos, skipping.


 31%|███▏      | 8112/25924 [3:05:45<8:50:24,  1.79s/it] 

User anaisbetts has more than 300 repos, skipping.


 31%|███▏      | 8150/25924 [3:07:57<68:55:12, 13.96s/it]

User match stagis logged.
[INFO] 370 users scraped so far.


 32%|███▏      | 8178/25924 [3:09:27<14:26:10,  2.93s/it]

User mattleibow has more than 300 repos, skipping.


 32%|███▏      | 8208/25924 [3:10:54<9:16:16,  1.88s/it] 

User kodecocodes has more than 300 repos, skipping.


 32%|███▏      | 8209/25924 [3:10:54<7:09:48,  1.46s/it]

User ghuntley has more than 300 repos, skipping.


 32%|███▏      | 8272/25924 [3:13:43<12:50:59,  2.62s/it]

User discourse has more than 300 repos, skipping.


 32%|███▏      | 8294/25924 [3:19:11<413:42:59, 84.48s/it]

User match miracle-as logged.
[INFO] 371 users scraped so far.


 32%|███▏      | 8314/25924 [3:20:04<11:15:57,  2.30s/it] 

User appleboy has more than 300 repos, skipping.


 32%|███▏      | 8351/25924 [3:21:26<7:26:46,  1.53s/it] 

User adafruit has more than 300 repos, skipping.


 32%|███▏      | 8383/25924 [3:22:59<12:02:26,  2.47s/it]

User ingydotnet has more than 300 repos, skipping.


 33%|███▎      | 8458/25924 [3:25:50<11:02:03,  2.27s/it]

User jdx has more than 300 repos, skipping.


 33%|███▎      | 8490/25924 [3:27:34<23:13:51,  4.80s/it]

User match peter-gram logged.
[INFO] 372 users scraped so far.


 33%|███▎      | 8499/25924 [3:28:27<67:54:24, 14.03s/it]

User match martinbaehrenzbjerregaard logged.
[INFO] 373 users scraped so far.


 33%|███▎      | 8502/25924 [3:28:30<26:21:59,  5.45s/it]

User bryanasdev000 has more than 300 repos, skipping.


 33%|███▎      | 8503/25924 [3:30:04<154:20:03, 31.89s/it]

User match gahms logged.
[INFO] 374 users scraped so far.


 33%|███▎      | 8512/25924 [3:30:22<18:04:12,  3.74s/it] 

User match MiracleJFM logged.
[INFO] 375 users scraped so far.


 33%|███▎      | 8513/25924 [3:30:25<16:37:38,  3.44s/it]

User match mstMiracle logged.
[INFO] 376 users scraped so far.


 33%|███▎      | 8517/25924 [3:30:32<10:33:51,  2.18s/it]

User openstack has more than 300 repos, skipping.


 33%|███▎      | 8525/25924 [3:30:49<12:31:48,  2.59s/it]

User match aha71 logged.
[INFO] 377 users scraped so far.


 33%|███▎      | 8529/25924 [3:31:02<16:44:26,  3.46s/it]

User match skvat logged.
[INFO] 378 users scraped so far.


 33%|███▎      | 8534/25924 [3:31:11<12:05:42,  2.50s/it]

User match smb2142 logged.
[INFO] 379 users scraped so far.


 33%|███▎      | 8539/25924 [3:31:27<13:56:01,  2.89s/it]

User match tsmmiracle logged.
[INFO] 380 users scraped so far.


 33%|███▎      | 8540/25924 [3:31:33<17:59:52,  3.73s/it]

User match brian-bek-jensen logged.
[INFO] 381 users scraped so far.


 33%|███▎      | 8543/25924 [3:31:38<12:22:57,  2.56s/it]

User match Rasmus-Rosendal logged.
[INFO] 382 users scraped so far.


 33%|███▎      | 8552/25924 [3:31:53<9:00:08,  1.87s/it] 

User match pja-miracle logged.
[INFO] 383 users scraped so far.


 33%|███▎      | 8569/25924 [3:32:33<20:18:06,  4.21s/it]

User match kfn logged.
[INFO] 384 users scraped so far.


 33%|███▎      | 8575/25924 [3:32:40<6:55:31,  1.44s/it] 

Token cycled to ACCESS_TOKEN_1.
Cycle


 33%|███▎      | 8576/25924 [3:33:06<41:36:00,  8.63s/it]

User match memadk logged.
[INFO] 385 users scraped so far.


 33%|███▎      | 8577/25924 [3:33:08<32:56:21,  6.84s/it]

User match jlemiracle logged.
[INFO] 386 users scraped so far.


 33%|███▎      | 8580/25924 [3:33:13<17:35:27,  3.65s/it]

User match MartinHNielsen logged.
[INFO] 387 users scraped so far.


 33%|███▎      | 8583/25924 [3:33:40<39:02:04,  8.10s/it]

User match loevdahl logged.
[INFO] 388 users scraped so far.


 33%|███▎      | 8584/25924 [3:34:17<7:12:53,  1.50s/it] 


OSError: [Errno 57] Socket is not connected: '/Volumes/SAM-SODAS-DISTRACT/Coding Distraction/github_as_a_market_device/output/second_tier_userinfo'