In [1]:
from google.cloud import bigquery
import os
import pandas as pd

In [2]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../../gcp_credentials.json'
client = bigquery.Client()

# Get projects code metrics

In [3]:
query = """
SELECT *
FROM `opensource-observer.oso.code_metrics_by_project`
"""
result = client.query(query)
metrics = result.to_dataframe()

In [4]:
metrics.drop(columns=['project_id', 'project_name', 'repository_source'], inplace=True)
metrics.tail(3)

Unnamed: 0,project_slug,first_commit_date,last_commit_date,repositories,stars,forks,contributors,new_contributors_6_months,contributors_6_months,avg_fulltime_devs_6_months,avg_active_devs_6_months,commits_6_months,issues_opened_6_months,issues_closed_6_months,pull_requests_opened_6_months,pull_requests_merged_6_months
1729,uniswap,2018-03-07 00:00:00+00:00,2024-04-29 00:00:00+00:00,59,27432,25468,5863,495,5863,1.0,18.666667,708.0,492.0,152.0,1054.0,699.0
1730,safe-global,2017-07-06 00:00:00+00:00,2024-04-30 00:00:00+00:00,118,4925,3621,1453,249,1453,6.166667,20.333333,5166.0,1096.0,1093.0,2952.0,2567.0
1731,offchainlabs,2019-06-04 00:00:00+00:00,2024-04-29 00:00:00+00:00,94,4871,3333,821,249,821,2.5,25.166667,4069.0,360.0,323.0,1504.0,1187.0


# Get all repos on OSO

In [5]:
query = """
SELECT *
FROM `opensource-observer.oso.repos_by_project`
"""
result = client.query(query)
repos = result.to_dataframe()
repos['repo_owner'] = repos['repo_name_with_owner'].apply(lambda x: x.split('/')[0])
repos.tail(3)

Unnamed: 0,project_id,project_slug,project_name,repository_source,artifact_id,repo_is_fork,repo_fork_count,repo_star_count,first_commit_date,last_commit_date,repo_name_with_owner,repo_owner
37711,wXDIn1ZhRNdQ682uvEAtHNxdN_fzvxGWSbcWWIA96s0=,freedom-of-the-press-foundation,Freedom of the Press Foundation,GITHUB,z93BPXSRK2_85cW3JMH6_wi-0gB8px17w7PkyXRi8BE=,True,1,1,NaT,NaT,freedomofpress/due_date_reminder,freedomofpress
37712,wXDIn1ZhRNdQ682uvEAtHNxdN_fzvxGWSbcWWIA96s0=,freedom-of-the-press-foundation,Freedom of the Press Foundation,GITHUB,0Exq3E25Iq02_ZssKk7_dlnMidJZT71e2Of9dQSWUIY=,True,0,1,NaT,NaT,freedomofpress/pdf-redact-tools,freedomofpress
37713,wXDIn1ZhRNdQ682uvEAtHNxdN_fzvxGWSbcWWIA96s0=,freedom-of-the-press-foundation,Freedom of the Press Foundation,GITHUB,mokXx9BoxOv7TGcCwgPpMS-ZQaopi6EURml8ALwaj8U=,True,0,0,NaT,NaT,freedomofpress/ansible-letsencrypt,freedomofpress


In [6]:
repo_to_slug = dict(zip(repos['repo_name_with_owner'], repos['project_slug']))

repo_owners = repos[['repo_owner', 'project_slug']].drop_duplicates()
owner_to_slug_counts = repo_owners['repo_owner'].value_counts()
oso_unique_owners = list(owner_to_slug_counts[owner_to_slug_counts==1].index)
oso_dup_owners = list(owner_to_slug_counts[owner_to_slug_counts>1].index)

owner_to_slug = (
    repo_owners[repo_owners['repo_owner']
                .isin(oso_unique_owners)]
    .set_index('repo_owner')
    ['project_slug']
    .to_dict()
)

# Get GG projects from Metabase

In [7]:
projects = pd.read_csv('data/csv/gg20_projects_2024-05-01T12_20_52.794267Z.csv')

oso_slugs = []
for _,row in projects.iterrows():
    project = row['project_github']
    name = row['project_name']
    if isinstance(project, str):
        project = project.lower().strip().strip('/')
        if project in repo_to_slug:
            slug = repo_to_slug[project]
            oso_slugs.append(slug)
            continue
        if project in owner_to_slug:
            slug = owner_to_slug[project]
            oso_slugs.append(slug)
            continue        
        if project in oso_dup_owners:
            print(project, name)
    oso_slugs.append(None)

    
projects['oso_slug'] = oso_slugs
projects.tail()

chainsafe Lodestar
scopelift Umbra
scopelift Flexible Voting
synthetixio Synpress
ethereum web3.py
openzeppelin OpenZeppelin Contracts


Unnamed: 0,chain_id,round_id,round_name,project_name,project_github,payout_address,status,oso_slug
565,42161,23,Hackathon Alumni,$CODEX: Photosynthetic Finance,cascadiacarbon,0xF1Ec938a362138ff0CC5268EC32690d9a137e83A,APPROVED,cascadiacarbon
566,42161,23,Hackathon Alumni,Ecco,ecco-web3,0x57adDe4d31F44fa86FB553979FC52EDE13364129,APPROVED,
567,42161,23,Hackathon Alumni,AndiLending,AndiLend,0xBF457f26F1e8e7788654C228eda8B8660cC9eFd4,APPROVED,
568,42161,23,Hackathon Alumni,FundIt,ecoland-world,0xc8f0bae52D42f42d1Aed7b4af00CA3EF6C516c23,APPROVED,ecoland-world
569,42161,23,Hackathon Alumni,AdLand,adcommune,0x26bBec292e5080ecFD36F38FF1619FF35826b113,APPROVED,adcommune


# Create a master dataframe of all projects on OSO w/ metrics

In [11]:
df = pd.merge(
    projects,
    metrics,
    left_on='oso_slug',
    right_on='project_slug',
    how='left'
).drop(columns=['chain_id', 'round_id', 'project_slug'])
df

Unnamed: 0,round_name,project_name,project_github,payout_address,status,oso_slug,first_commit_date,last_commit_date,repositories,stars,...,contributors,new_contributors_6_months,contributors_6_months,avg_fulltime_devs_6_months,avg_active_devs_6_months,commits_6_months,issues_opened_6_months,issues_closed_6_months,pull_requests_opened_6_months,pull_requests_merged_6_months
0,dApps & Apps,Tape (formerly Lenstube),tapexyz,0x01d79BcEaEaaDfb8fD2F2f53005289CFcF483464,APPROVED,tapexyz,2022-08-27 00:00:00+00:00,2024-04-29 00:00:00+00:00,4,4395,...,38,8,38,1.0,1.000000,719.0,9.0,22.0,271.0,260.0
1,dApps & Apps,Hey.xyz (formerly Lenster),heyxyz,0x03Ba34f6Ea1496fa316873CF8350A3f7eaD317EF,APPROVED,heyxyz,2022-03-19 00:00:00+00:00,2024-04-29 00:00:00+00:00,2,22408,...,444,76,444,1.0,1.166667,3101.0,202.0,228.0,657.0,572.0
2,dApps & Apps,Toaster Finance,toaster-finance,0x30CAEb399186E087668e85F4424ea39Baf4981aD,APPROVED,toaster-finance,2023-09-28 00:00:00+00:00,2024-04-26 00:00:00+00:00,4,1,...,2,1,2,0.0,0.333333,3.0,,,1.0,
3,dApps & Apps,ENS Wayback Machine,BlossomLabs,0x6ea869B6870dd98552B0C7e47dA90702a436358b,APPROVED,ens-wayback-machine-blossomlabs,2023-04-02 00:00:00+00:00,2024-04-25 00:00:00+00:00,1,0,...,2,1,2,0.0,0.333333,6.0,,,1.0,1.0
4,dApps & Apps,Bloom Network,BloomNetwork,0x5219ffb88175588510e9752A1ecaA3cd217ca783,APPROVED,bloomnetwork,2023-04-19 00:00:00+00:00,2023-08-11 00:00:00+00:00,1,1,...,1,0,1,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
565,Hackathon Alumni,$CODEX: Photosynthetic Finance,cascadiacarbon,0xF1Ec938a362138ff0CC5268EC32690d9a137e83A,APPROVED,cascadiacarbon,2019-07-26 00:00:00+00:00,2024-04-08 00:00:00+00:00,13,4,...,4,2,4,,,5.0,,,3.0,
566,Hackathon Alumni,Ecco,ecco-web3,0x57adDe4d31F44fa86FB553979FC52EDE13364129,APPROVED,,NaT,NaT,,,...,,,,,,,,,,
567,Hackathon Alumni,AndiLending,AndiLend,0xBF457f26F1e8e7788654C228eda8B8660cC9eFd4,APPROVED,,NaT,NaT,,,...,,,,,,,,,,
568,Hackathon Alumni,FundIt,ecoland-world,0xc8f0bae52D42f42d1Aed7b4af00CA3EF6C516c23,APPROVED,ecoland-world,2022-08-27 00:00:00+00:00,2024-04-26 00:00:00+00:00,5,0,...,5,1,5,0.0,0.166667,2.0,,,,


In [16]:
df.to_csv("/Users/cerv1-air/Downloads/gg20-projects-export.csv")

In [14]:
summary_stats = pd.concat([
    projects.groupby('round_name')['payout_address'].count(),
    projects.groupby('round_name')['project_github'].count(),
    df.groupby('round_name')['oso_slug'].count(),
    df[df['first_commit_date'].isna() == True].groupby('round_name')['oso_slug'].count()
], axis=1)
summary_stats.columns = ['Total Projects', 'Projects w GitHub', 'Projects on OSO', 'New Projects on OSO']
summary_stats['Collection Size'] = round(summary_stats['Total Projects'] * .2)
summary_stats = summary_stats.fillna(0).astype(int)

summary_stats

Unnamed: 0_level_0,Total Projects,Projects w GitHub,Projects on OSO,New Projects on OSO,Collection Size
round_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Climate Round,127,41,22,4,25
Developer Tooling and Libraries,46,46,34,5,9
ENS Identity,38,27,20,3,8
Hackathon Alumni,53,53,31,13,11
Hypercerts Ecosystem Round,35,20,16,4,7
OpenCivics Consortium Round 02,16,7,5,0,3
Token Engineering QF Grants Round: Spring 2024,32,21,14,4,6
Web3 Infrastructure,75,75,57,12,15
dApps & Apps,148,148,115,22,30


# Specific metrics

In [17]:
criteria = {
    'dApps & Apps ': 'avg_fulltime_devs_6_months',
    'Web3 Infrastructure': 'first_commit_date',
    'Developer Tooling and Libraries': 'forks',
    'Hackathon Alumni': 'commits_6_months', # days active

    'ENS Identity': 'pull_requests_merged_6_months',
    'Token Engineering QF Grants Round: Spring 2024': 'issues_opened_6_months',
        
    'Hypercerts Ecosystem Round': 'stars',
    'Climate Round': 'avg_fulltime_devs_6_months',
    'OpenCivics Consortium Round 02': 'stars' 
}

In [18]:
summary_stats.loc['Web3 Infrastructure', 'Collection Size']

15

In [19]:
for round_name, metric in criteria.items():
    dff = df[df['round_name'] == round_name]
    n = summary_stats.loc[round_name, 'Collection Size']
    top_projects = list(dff.sort_values(by=metric, ascending=False)['project_name'].head(n))
    print(round_name)
    print(top_projects)
    print()

dApps & Apps 
['ShapeShift DAO', 'rotki', 'Vocdoni - Secure digital voting', 'Giveth', 'Mississippi', 'JobStash', 'growthepie.xyz 🥧📏 - Usage Data and Analytics for Ethereum Scaling Solutions', 'Liker Land - NFT eBook - Empower Content Ownership', 'WTF Academy', 'Kiwi News', 'Impact Markets', 'DexKit', 'Hey.xyz (formerly Lenster)', 'Tape (formerly Lenstube)', 'Bankless Academy', 'Eppie: Web3 native email', 'Citizen Wallet', 'ZKP2P Fiat On Ramp', 'Revoke.cash', 'JediSwap', 'Tally Zero', 'IDriss - A more usable web3 for everyone', 'Mirror', 'Social Layer', 'viaPrize', 'Carmine Finance', 'Denota Protocol', 'MetaGame', 'Spark | Deresy & Hyperminter', 'Geo Web']

Web3 Infrastructure
['PORTERS', 'True Network', 'Ultimate Digits', 'nim-libp2p', 'quic-go', 'Zerokit', 'Ethscriptions', 'Lakat 📜', 'EIP-7265 Alliance - securing the future of ethereum', 'Rantom', 'Blockhead: portfolio tracker, block explorer and web3 browser', 'Bonding Curve Research Group (BCRG)', 'Opti.Domains | Scale ENS to OP', 

In [84]:
metrics.columns

Index(['project_slug', 'first_commit_date', 'last_commit_date', 'repositories',
       'stars', 'forks', 'contributors', 'new_contributors_6_months',
       'contributors_6_months', 'avg_fulltime_devs_6_months',
       'avg_active_devs_6_months', 'commits_6_months',
       'issues_opened_6_months', 'issues_closed_6_months',
       'pull_requests_opened_6_months', 'pull_requests_merged_6_months'],
      dtype='object')

In [28]:
with open("data/gg20-github-checks.json", "w") as f:
    json.dump(not_found, f, indent=2)

174

In [36]:
projects_to_add = []
for p in not_found:
    if p['outcome']['Approved']:
        projects_to_add.append(p)
        
artifacts_to_add = [x['artifact'] for x in projects_to_add]        

In [48]:
csv_version = (
    projects[projects['project_github'].str.lower().isin(artifacts_to_add)]
    [['project_name', 'project_github']]
    .drop_duplicates()
)
csv_version.columns = ['Project', 'GitHub']

csv_version['GitHub'] = csv_version['GitHub'].apply(lambda x: f"https://github.com/{x.lower()}")
csv_version

Unnamed: 0,Project,GitHub
10,Metrics Garden Labs,https://github.com/metrics-garden-labs
12,Armitage,https://github.com/armitage-labs
20,Index Wallets,https://github.com/optionhq
36,IPC Explorer,https://github.com/cronian-tech
37,Proof of passport,https://github.com/zk-passport
...,...,...
507,Agents With Benefits,https://github.com/agentswbenefits
513,Perpetual Organization Architect,https://github.com/perpetualorganizationarchitect
519,Coordination-Play,https://github.com/coordination-play
520,AI Swarm,https://github.com/aiswarm


In [52]:
csv_version.to_csv("data/csv/gg20_new_projects.csv")