In [1]:
# !pip install requests pandas bokeh python-dotenv

import requests
import pandas as pd
from bokeh.plotting import figure, output_notebook, show
from bokeh.models import HoverTool
import neo4j
from utils.neo4jdownloader import Neo4JDownloader
from pathlib import Path
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

output_notebook()


In [6]:


def get_downloader():
    secrets_dir = Path("/secrets")
    if os.path.isdir(Path("/secrets")):
        NEO4J_URI = (secrets_dir / "neo4j_uri").read_text()
        NEO4J_USERNAME = (secrets_dir / "neo4j_user").read_text()
        NEO4J_PASSWORD = (secrets_dir / "neo4j_password").read_text()
        NEO4J_DATABASE = (secrets_dir / "neo4j_database").read_text()
    else:
        display("Using local env file to configure Tentris.")
        NEO4J_URI = os.getenv("NEO4J_URI")
        NEO4J_USERNAME = os.getenv("NEO4J_USER")
        NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
        NEO4J_DATABASE = os.getenv("NEO4J_DATABASE")

    return Neo4JDownloader(NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD, NEO4J_DATABASE)

def connect_neo4j():
    downloader = get_downloader()
    return downloader

In [26]:
downloader = connect_neo4j()

orgs_query = """
MATCH (o:org)
RETURN o.name AS organization
ORDER BY organization;
"""
organizations = downloader.run_custom_query(orgs_query)

for org in organizations:
    print(org["organization"])


'Using local env file to configure Tentris.'

1lettre1sourire
1like-1app
451hackathon
AI-TranspWood
AI-View
AI4LAM
AJMGroup
ALA-herbarium
ATMoS-Waterloo
AdaptiveMotorControlLab
Adobe-Marketing-Cloud
AljabrIO
AmenityDev
ArbiterLib
ArcaniteSolutions
Assistive-Technology-Challenge-2023
Azure
BD2-TeaParty
BHoM
BRITorg
BelgianBiodiversityPlatform
BilkentCompGen
BioAcoustica
BioEcoOcean
BlenderBQ
BlindlyTeam
BlockResearchGroup
BoevaLab
CERNDocumentServer
CHESS-mission
CMU-SAFARI
CONABIO
CS-433
CS-552
CakeML
Cambridge-Community-Kitchen
Cambrium-bio
Canadensys
Carthage
CatalogueOfLife
CesiumLabs
ChainAgnostic
ChiangLab
Cliffe-Group
ClimateMatchAcademy
CoFiF
Code52
CodersCommunity
CoordML
CosmicPi
CrowdNotifier
CrystalCraftMC
DART-Lab-LLUI
DHBern
DINAcon
DOLMA-NLP
DP-3T
DPGAlliance
Datafable
DeepLabCut
DeepLabCutAIResidency
DeliteEPFL
DevMine
DigitalMediaSIG
DudLab
ENAC-CNPA
EPFL-AI-Team
EPFL-Center-for-Imaging
EPFL-EMPlus
EPFL-ENAC
EPFL-LAPD
EPFL-LASUR
EPFL-LCSB
EPFL-Open-Science
EPFLValais
EPFLXplore
EPFLiGHT
ETHZ-RobotX
EduWireApps
Elem

In [29]:
ORGANIZATION_OF_INTEREST = "EPFL-ENAC"

repos_of_org_query = """
MATCH (o:org)-[:owner_of]->(repo:repo)
WHERE o.name = $org_name
RETURN repo.name AS repository
ORDER BY repository;
"""
parameters = {
    "org_name": ORGANIZATION_OF_INTEREST
}
repos_in_org = downloader.run_custom_query(repos_of_org_query, parameters)


print('repos for organization:', ORGANIZATION_OF_INTEREST)

org_repos = [repo['repository'].split('/')[-1] for repo in repos_in_org if repo['repository'].split('/')[-1] != ".github"]

for repo in org_repos:
    print(repo)

repos for organization: EPFL-ENAC
ALICE-ETHZ-AREMA
AddLidar
CCFatiguePlatform
CIVIL-465-assignment2
CKAN_ext_localfolders
CKAN_ext_oaipmh
EIRA-data
EML-Water-Portal
EML-urban-hydro-model
ENACDataCatalog
ENACDataCatalogPOC
ENACdrives
ENACdrives-webservice
ENACrestic
EPFL-ENAC.github.io
EmailAttachmentExtractor
HOBEL-IAQ
ICE-EEML
INPLUGS-CO2
IT4R-dataeng-hmw
LASUR-R-SHINY
LASUR-swiss-proximity
LUTS-pneuma-simulator
MANSLAB-IBOIS-EESD-timberstone
MANSLAB-IBOIS-EESD-timberstone2
ORPER-summer-school
PotreeConverterMakefile
SBER-FlowCytometryClustering
SB_LinServerBasics
SOIL-monash-flood-portal
TOPO-DataGen
TempAqua_DB
TethysChlorisCore.jl
URBES-utc-test-data
UrbanTethysChloris.jl
WaterAgeNeutralHabitats
alice-perl-eerl-urbtrees
alicelandings
bluecity-viz
bluecity_sp04
bpe-sesvc
change-transcode-test-data
citytherm
dclab-webscrapping
eerl-icebreaker
eesd-mast
eesd-mast-cli
eesd-mmsdb
eira
enac-cd-app
enac-it4r-repo
enac-it4r-repo-python
enac-web-hosting-a10-2022
enac-web-hosting-dmz-2022
en

In [30]:
def get_repo_info(owner, repo):
    url = f"https://api.ossinsight.io/gh/repo/{owner}/{repo}"
    res = requests.get(url)
    res.raise_for_status()
    return res.json()["data"]

# Fetch repo info for all repos in the organization
repo_data = []
for repo in org_repos:
    try:
        repo_info = get_repo_info(ORGANIZATION_OF_INTEREST, repo)
        repo_data.append(repo_info)
        print(f"Fetched data for: {repo}")
    except Exception as e:
        print(f"Error fetching {repo}: {e}")

# Create pandas DataFrame
df_repos = pd.DataFrame(repo_data)
print(f"\nFetched {len(df_repos)} repositories")
df_repos


Fetched data for: ALICE-ETHZ-AREMA
Fetched data for: AddLidar
Fetched data for: AddLidar
Fetched data for: CCFatiguePlatform
Fetched data for: CCFatiguePlatform
Fetched data for: CIVIL-465-assignment2
Fetched data for: CIVIL-465-assignment2
Fetched data for: CKAN_ext_localfolders
Fetched data for: CKAN_ext_localfolders
Fetched data for: CKAN_ext_oaipmh
Fetched data for: CKAN_ext_oaipmh
Fetched data for: EIRA-data
Fetched data for: EIRA-data
Fetched data for: EML-Water-Portal
Fetched data for: EML-Water-Portal
Fetched data for: EML-urban-hydro-model
Fetched data for: EML-urban-hydro-model
Fetched data for: ENACDataCatalog
Fetched data for: ENACDataCatalog
Fetched data for: ENACDataCatalogPOC
Fetched data for: ENACDataCatalogPOC
Fetched data for: ENACdrives
Fetched data for: ENACdrives
Fetched data for: ENACdrives-webservice
Fetched data for: ENACdrives-webservice
Fetched data for: ENACrestic
Fetched data for: ENACrestic
Fetched data for: EPFL-ENAC.github.io
Fetched data for: EPFL-ENAC.g

Unnamed: 0,id,node_id,name,full_name,private,owner,html_url,description,fork,url,...,open_issues,watchers,default_branch,permissions,temp_clone_token,custom_properties,organization,network_count,subscribers_count,template_repository
0,629408899,R_kgDOJYQEgw,ALICE-ETHZ-AREMA,EPFL-ENAC/ALICE-ETHZ-AREMA,False,"{'login': 'EPFL-ENAC', 'id': 61712940, 'node_i...",https://github.com/EPFL-ENAC/ALICE-ETHZ-AREMA,Arema - 0135_A4,False,https://api.github.com/repos/EPFL-ENAC/ALICE-E...,...,12,4,main,"{'admin': False, 'maintain': False, 'push': Fa...",,{},"{'login': 'EPFL-ENAC', 'id': 61712940, 'node_i...",0,5,
1,996178702,R_kgDOO2B7Dg,AddLidar,EPFL-ENAC/AddLidar,False,"{'login': 'EPFL-ENAC', 'id': 61712940, 'node_i...",https://github.com/EPFL-ENAC/AddLidar,0163_A1-AddLidar - Test version here : https:/...,False,https://api.github.com/repos/EPFL-ENAC/AddLidar,...,6,0,main,"{'admin': False, 'maintain': False, 'push': Fa...",,{},"{'login': 'EPFL-ENAC', 'id': 61712940, 'node_i...",0,0,"{'id': 507931522, 'node_id': 'R_kgDOHkZrgg', '..."
2,362422603,MDEwOlJlcG9zaXRvcnkzNjI0MjI2MDM=,CCFatiguePlatform,EPFL-ENAC/CCFatiguePlatform,False,"{'login': 'EPFL-ENAC', 'id': 61712940, 'node_i...",https://github.com/EPFL-ENAC/CCFatiguePlatform,ccfatigue https://ccfatigue.epfl.ch CCLAB - 00...,False,https://api.github.com/repos/EPFL-ENAC/CCFatig...,...,28,3,develop,"{'admin': False, 'maintain': False, 'push': Fa...",,{},"{'login': 'EPFL-ENAC', 'id': 61712940, 'node_i...",12,3,
3,626485763,R_kgDOJVdqAw,CIVIL-465-assignment2,EPFL-ENAC/CIVIL-465-assignment2,False,"{'login': 'EPFL-ENAC', 'id': 61712940, 'node_i...",https://github.com/EPFL-ENAC/CIVIL-465-assignm...,Templates for Assignment 2,False,https://api.github.com/repos/EPFL-ENAC/CIVIL-4...,...,0,0,main,"{'admin': False, 'maintain': False, 'push': Fa...",,{},"{'login': 'EPFL-ENAC', 'id': 61712940, 'node_i...",0,6,
4,366648619,MDEwOlJlcG9zaXRvcnkzNjY2NDg2MTk=,CKAN_ext_localfolders,EPFL-ENAC/CKAN_ext_localfolders,False,"{'login': 'EPFL-ENAC', 'id': 61712940, 'node_i...",https://github.com/EPFL-ENAC/CKAN_ext_localfol...,CKAN Data Catalog Plug-in for connecting to lo...,False,https://api.github.com/repos/EPFL-ENAC/CKAN_ex...,...,1,0,master,"{'admin': False, 'maintain': False, 'push': Fa...",,{},"{'login': 'EPFL-ENAC', 'id': 61712940, 'node_i...",0,5,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109,877238985,R_kgDONEmayQ,urbes-viz,EPFL-ENAC/urbes-viz,False,"{'login': 'EPFL-ENAC', 'id': 61712940, 'node_i...",https://github.com/EPFL-ENAC/urbes-viz,,False,https://api.github.com/repos/EPFL-ENAC/urbes-viz,...,1,0,main,"{'admin': False, 'maintain': False, 'push': Fa...",,{},"{'login': 'EPFL-ENAC', 'id': 61712940, 'node_i...",0,3,
110,685907909,R_kgDOKOIfxQ,vector_tiles_generator,EPFL-ENAC/vector_tiles_generator,False,"{'login': 'EPFL-ENAC', 'id': 61712940, 'node_i...",https://github.com/EPFL-ENAC/vector_tiles_gene...,A repo to create vector tile from geospatial f...,False,https://api.github.com/repos/EPFL-ENAC/vector_...,...,0,0,main,"{'admin': False, 'maintain': False, 'push': Fa...",,{},"{'login': 'EPFL-ENAC', 'id': 61712940, 'node_i...",0,5,
111,569313688,R_kgDOIe8JmA,vita-app,EPFL-ENAC/vita-app,False,"{'login': 'EPFL-ENAC', 'id': 61712940, 'node_i...",https://github.com/EPFL-ENAC/vita-app,From picture of medical document to structured...,False,https://api.github.com/repos/EPFL-ENAC/vita-app,...,5,0,main,"{'admin': False, 'maintain': False, 'push': Fa...",,{},"{'login': 'EPFL-ENAC', 'id': 61712940, 'node_i...",0,5,
112,450493334,R_kgDOGtn7lg,vue-cli-preset,EPFL-ENAC/vue-cli-preset,False,"{'login': 'EPFL-ENAC', 'id': 61712940, 'node_i...",https://github.com/EPFL-ENAC/vue-cli-preset,,False,https://api.github.com/repos/EPFL-ENAC/vue-cli...,...,0,0,main,"{'admin': False, 'maintain': False, 'push': Fa...",,{},"{'login': 'EPFL-ENAC', 'id': 61712940, 'node_i...",0,6,


In [31]:
df_repos.columns

Index(['id', 'node_id', 'name', 'full_name', 'private', 'owner', 'html_url',
       'description', 'fork', 'url', 'forks_url', 'keys_url',
       'collaborators_url', 'teams_url', 'hooks_url', 'issue_events_url',
       'events_url', 'assignees_url', 'branches_url', 'tags_url', 'blobs_url',
       'git_tags_url', 'git_refs_url', 'trees_url', 'statuses_url',
       'languages_url', 'stargazers_url', 'contributors_url',
       'subscribers_url', 'subscription_url', 'commits_url', 'git_commits_url',
       'comments_url', 'issue_comment_url', 'contents_url', 'compare_url',
       'merges_url', 'archive_url', 'downloads_url', 'issues_url', 'pulls_url',
       'milestones_url', 'notifications_url', 'labels_url', 'releases_url',
       'deployments_url', 'created_at', 'updated_at', 'pushed_at', 'git_url',
       'ssh_url', 'clone_url', 'svn_url', 'homepage', 'size',
       'stargazers_count', 'watchers_count', 'language', 'has_issues',
       'has_projects', 'has_downloads', 'has_wiki', 'has

In [34]:
for key, value in df_repos.iloc[0].items():
    print(f"{key}: {value}")


id: 629408899
node_id: R_kgDOJYQEgw
name: ALICE-ETHZ-AREMA
full_name: EPFL-ENAC/ALICE-ETHZ-AREMA
private: False
owner: {'login': 'EPFL-ENAC', 'id': 61712940, 'node_id': 'MDEyOk9yZ2FuaXphdGlvbjYxNzEyOTQw', 'avatar_url': 'https://avatars.githubusercontent.com/u/61712940?v=4', 'gravatar_id': '', 'url': 'https://api.github.com/users/EPFL-ENAC', 'html_url': 'https://github.com/EPFL-ENAC', 'followers_url': 'https://api.github.com/users/EPFL-ENAC/followers', 'following_url': 'https://api.github.com/users/EPFL-ENAC/following{/other_user}', 'gists_url': 'https://api.github.com/users/EPFL-ENAC/gists{/gist_id}', 'starred_url': 'https://api.github.com/users/EPFL-ENAC/starred{/owner}{/repo}', 'subscriptions_url': 'https://api.github.com/users/EPFL-ENAC/subscriptions', 'organizations_url': 'https://api.github.com/users/EPFL-ENAC/orgs', 'repos_url': 'https://api.github.com/users/EPFL-ENAC/repos', 'events_url': 'https://api.github.com/users/EPFL-ENAC/events{/privacy}', 'received_events_url': 'https://

In [None]:
from bokeh.models import LabelSet, ColumnDataSource

# Filter out repos with 0 watchers and 0 stars
df_filtered = df_repos[(df_repos['watchers_count'] > 0) | (df_repos['stargazers_count'] > 0)]

# Sort by total (watchers + stars) for better visualization
df_sorted = df_filtered.sort_values('watchers_count', ascending=True)

p = figure(
    title=f"Watchers and Stars Distribution - {ORGANIZATION_OF_INTEREST}",
    x_axis_label='Count',
    y_axis_label='Repository',
    width=900,
    height=max(600, len(df_sorted) * 40),  # More space for grouped bars
    y_range=df_sorted['name'].tolist(),
    tools="pan,wheel_zoom,box_zoom,reset"
)

# Offset for grouped bars
offset = 0.21

# Add watchers bars (blue)
p.hbar(y=[i + offset for i in range(len(df_sorted))],
       right=df_sorted['watchers_count'],
       height=0.35,
       color="steelblue",
       legend_label="Watchers")

# Add stars bars (green)
p.hbar(y=[i - offset for i in range(len(df_sorted))],
       right=df_sorted['stargazers_count'],
       height=0.35,
       color="green",
       legend_label="Stars")

# Add text labels for watchers
df_sorted['y_watchers'] = [i + offset for i in range(len(df_sorted))]
df_sorted['y_stars'] = [i - offset for i in range(len(df_sorted))]

source = ColumnDataSource(df_sorted)

labels_watchers = LabelSet(x='watchers_count', y='y_watchers', text='watchers_count',
                           x_offset=5, y_offset=-5, source=source,
                           text_font_size="9pt", text_color="navy")

labels_stars = LabelSet(x='stargazers_count', y='y_stars', text='stargazers_count',
                        x_offset=5, y_offset=-5, source=source,
                        text_font_size="9pt", text_color="darkgreen")

p.add_layout(labels_watchers)
p.add_layout(labels_stars)

p.legend.location = "bottom_right"
p.x_range.start = 0

show(p)
