# 0. Importing packages

In [1]:
# Load the autoreload extension to automatically reload modules before executing code (to avoid restarting the kernel)
%load_ext autoreload 
# NB. uncomment the line above first time you run this cell
%autoreload 2

In [16]:
from pathlib import Path
import pandas as pd
from typing import Dict
from tqdm import tqdm

from resources.filter_functions import look_company_up_in_edgelist
from resources.github_functions import GithubScraper


# Instantiate GithubScraper
gs = GithubScraper()

GithubScraper initialized with 3 tokens.
First token in cycle. Initiating ACCESS_TOKEN_1.
GithubScraper initialized with 0 companies and 0 users already scraped.


## 0.1 File Paths

In [17]:
fp_main = Path('/Volumes/SAM-SODAS-DISTRACT/Coding Distraction/github_as_a_market_device')
fp_main_output = Path(fp_main / 'output')

# 1 Load in the data

In [26]:
# Read in the data
all_edges_user_level = pd.read_parquet(fp_main_output / 'all_edges_user_level.gzip.parquet')
attention_edges_user_level = pd.read_parquet(fp_main_output / 'attention_edges_user_level.gzip.parquet')
collaboration_edges_user_level = pd.read_parquet(fp_main_output / 'collaboration_edges_user_level.gzip.parquet')

In [27]:
mask = (collaboration_edges_user_level['src_company'] == 'trifork') | (collaboration_edges_user_level['target_company'] == 'trifork')
collaboration_edges_user_level[mask]

Unnamed: 0,src,target,src_company,target_company,src_company_category,src_company_label,target_company_category,target_company_label,d_intra_level,d_inter_level,edge_repo,action,created_at
35,frjtrifork,trifork,trifork,trifork,1,1 Digital and marketing consultancies,1,1 Digital and marketing consultancies,1,0,dgws/frjtrifork,forks,2012-05-10
36,bjarkehs,trifork,trifork,trifork,1,1 Digital and marketing consultancies,1,1 Digital and marketing consultancies,1,0,fastlane-plugin-prepare_build_resources/bjarkehs,forks,2017-01-26
37,frjtrifork,trifork,trifork,trifork,1,1 Digital and marketing consultancies,1,1 Digital and marketing consultancies,1,0,sdm/frjtrifork,forks,2012-03-02
38,MadsPoder,trifork,trifork,trifork,1,1 Digital and marketing consultancies,1,1 Digital and marketing consultancies,1,0,the-vault/MadsPoder,forks,2019-09-16
39,SebastianRask,trifork,trifork,trifork,1,1 Digital and marketing consultancies,1,1 Digital and marketing consultancies,1,0,TIM-Android/SebastianRask,forks,2023-03-15
40,LVH-trifork,trifork,trifork,trifork,1,1 Digital and marketing consultancies,1,1 Digital and marketing consultancies,1,0,TIM-Android/LVH-trifork,forks,2022-11-22
41,bjarkehs,trifork,trifork,trifork,1,1 Digital and marketing consultancies,1,1 Digital and marketing consultancies,1,0,TPA-fastlane-plugin-tpa/bjarkehs,forks,2018-08-06
42,jhntrifork,trifork,trifork,trifork,1,1 Digital and marketing consultancies,1,1 Digital and marketing consultancies,1,0,TriforkSwiftExtensions/jhntrifork,forks,2018-10-04
437,madsthom,madsthom,trifork,trifork,1,1 Digital and marketing consultancies,1,1 Digital and marketing consultancies,1,0,zapr/madsthom,forks,2017-12-01
438,madsthom,madsthom,trifork,trifork,1,1 Digital and marketing consultancies,1,1 Digital and marketing consultancies,1,0,ngx-cron-jobs/madsthom,forks,2020-12-04


# 2. Examining the networks

## 2.1 Collaboration

In [29]:
result_trifork = look_company_up_in_edgelist(
    company='trifork', 
    edgelist=collaboration_edges_user_level, 
    direction='out', 
    exclude_self_loops=True
)
result_trifork

Unnamed: 0,src,target,src_company,target_company,src_company_category,src_company_label,target_company_category,target_company_label,d_intra_level,d_inter_level,edge_repo,action,created_at
967,madsthom,mkholt,trifork,delegateas,1,1 Digital and marketing consultancies,1,1 Digital and marketing consultancies,0,1,pcf-reloader-transformer/madsthom,forks,2022-07-06
1217,madsthom,mkholt,trifork,delegateas,1,1 Digital and marketing consultancies,1,1 Digital and marketing consultancies,0,1,pcf-reloader-transformer/madsthom,forks,2022-07-06


## 2.2 Attention

In [35]:
result_trifork = look_company_up_in_edgelist(
    company='trifork', 
    edgelist=attention_edges_user_level, 
    direction='all', 
    exclude_self_loops=False
)
result_trifork

Unnamed: 0,src,target,src_company,target_company,src_company_category,src_company_label,target_company_category,target_company_label,d_intra_level,d_inter_level,edge_repo,action,created_at
43,mfrtrifork,trifork,trifork,trifork,1,1 Digital and marketing consultancies,1,1 Digital and marketing consultancies,1,0,AndroidBLE/mfrtrifork,stars,2014-02-23
44,frjtrifork,trifork,trifork,trifork,1,1 Digital and marketing consultancies,1,1 Digital and marketing consultancies,1,0,dgws/frjtrifork,stars,2011-05-25
45,jkiddo,trifork,trifork,trifork,1,1 Digital and marketing consultancies,1,1 Digital and marketing consultancies,1,0,dgws/jkiddo,stars,2010-02-26
46,jkiddo,trifork,trifork,trifork,1,1 Digital and marketing consultancies,1,1 Digital and marketing consultancies,1,0,erjang/jkiddo,stars,2010-02-26
47,bjarkehs,trifork,trifork,trifork,1,1 Digital and marketing consultancies,1,1 Digital and marketing consultancies,1,0,fastlane-plugin-prepare_build_resources/bjarkehs,stars,2012-09-04
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3196,hcnp,hcnp,trifork,trifork,1,1 Digital and marketing consultancies,1,1 Digital and marketing consultancies,1,0,trireg2-import-clockify/hcnp,watches,2024-05-13
3197,hcnp,trifork,trifork,trifork,1,1 Digital and marketing consultancies,1,1 Digital and marketing consultancies,1,0,ig-publisher/hcnp,watches,2024-05-16
3198,hcnp,hcnp,trifork,trifork,1,1 Digital and marketing consultancies,1,1 Digital and marketing consultancies,1,0,hello-world/hcnp,watches,2025-03-14
3199,twantrifork,trifork,trifork,trifork,1,1 Digital and marketing consultancies,1,1 Digital and marketing consultancies,1,0,,follows,2010-07-02


# 3. Get number of followers

In [7]:
# Get all unique GitHub usernames from the edge list
unique_users = pd.concat([all_edges_user_level['src'], all_edges_user_level['target']]).unique()

def get_followers(username: str) -> int | None:
    """
    Returns the number of followers for a given GitHub username.
    If the user does not exist or is private, returns None.
    """
    try:
        user_obj = gs.get_user(username)
        if user_obj is None:
            print(f"[!] User '{username}' not found.")
            return None
        return user_obj.followers
    except Exception as e:
        print(f"[!] Error retrieving user '{username}': {e}")
        return None

# Query follower counts for all unique users
followers = {user: get_followers(user) for user in tqdm(unique_users)}

 18%|█▊        | 56/305 [00:25<01:59,  2.08it/s]

KeyboardInterrupt: 

In [None]:
# Filter out users with no follower data (None)
valid_followers: Dict[str, int] = {k: v for k, v in followers.items() if v is not None}

# Sort users by number of followers (descending)
followers_sorted = dict(sorted(valid_followers.items(), key=lambda item: item[1], reverse=True))

# Print the top 20 users with the most followers
print("Top 20 users by follower count:\n")
for user, count in list(followers_sorted.items())[:20]:
    print(f"{user}: {count}")

Top 20 users by follower count:

EG-A-S: 375
kennylevinsen: 172
thebuilder: 160
trifork: 66
jkiddo: 50
jeme: 40
shapehq: 40
kawaiipantsu: 38
AndersSpringborg: 26
madsthom: 25
bjarkehs: 21
gugi9000: 20
HusseinElZein: 20
fredelundbeck: 18
hcarreras: 17
signifly: 17
jacobe: 16
TusharRoy23: 13
mathiasandresen: 12
anderslime: 12
