In [1]:
# pip install github
# pip install tqdm
# pip install python-dotenv

In [22]:
import github
import pandas as pd
import csv
from tqdm import tqdm
import time
import os
from dotenv import load_dotenv
load_dotenv()
tqdm.pandas()

In [9]:
GITHUB_USERNAME = os.getenv("GITHUB_USERNAME")
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")

## GitHub Client setup

In [10]:
client = await github.GHClient(username=GITHUB_USERNAME, token=GITHUB_TOKEN)

## F-Droid apps list, only github apps

In [11]:
apps_list = pd.read_csv("data/fdroid_apps_list_complete.csv")
apps_list = apps_list[~apps_list["source_code"].isna()]


In [12]:
gh_filter = apps_list["source_code"].str.contains("github")
github_apps_list = apps_list[gh_filter]

In [17]:
def extract_repo_owner(url: str) -> dict:
    offset = -1
    if url.endswith("/"):
        offset -= 1
    splitted = url.split("/")
    return {
        "repo": splitted[offset],
        "owner": splitted[offset - 1]
    }

class DummyRepoInfo:
    def __init__(self) -> None:
        self.stargazers_count = None
        self.language = None
        self.archived = None
        self.disabled = None
        self.updated_at = None
        self.clone_url = None
        self.forks = None
        self.is_fork = None

In [14]:
github_apps_list_complete = github_apps_list

In [23]:
async def get_info_from_gh(df: pd.DataFrame, start_after: str = None):
    first_line = True
    should_skip = True if start_after else False
    for idx, row in tqdm(df.iterrows(), total=df.shape[0]):
        if should_skip:
            first_line = False
            print("skipping", df["package"])
            if df["package"] == start_after:
                should_skip = False
            continue
        row_dict = row.to_dict()
        time.sleep(0.8)
        try:
            repo_info = await client.get_repo(**extract_repo_owner(row["source_code"]))
        except github.RepositoryNotFound:
            repo_info = DummyRepoInfo()
        row_dict["stars"] = repo_info.stargazers_count
        row_dict["primary_language"] = repo_info.language
        row_dict["is_archived"] = repo_info.archived
        row_dict["is_disabled"] = repo_info.disabled
        row_dict["updated_at"] = repo_info.updated_at
        row_dict["clone_url"] = repo_info.clone_url
        row_dict["forks"] = repo_info.forks
        row_dict["is_fork"] = repo_info.is_fork
        # Save to csv
        with open("data/github_apps_list_complete.csv", 'a') as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=row_dict.keys())
            if first_line == True:
                first_line = False
                writer.writeheader()
            writer.writerow(row_dict)


In [25]:
# await get_info_from_gh(github_apps_list) # uncomment for ingestion

100%|██████████| 3961/3961 [1:19:45<00:00,  1.21s/it] 


In [26]:
github_apps_list_complete = pd.read_csv("data/github_apps_list_complete.csv")

## Basic stats

#### Number of projects by language

In [28]:
github_apps_list_complete["primary_language"].value_counts().head()

primary_language
Java      2329
Kotlin     900
Dart       211
C           92
C++         66
Name: count, dtype: int64

#### Number of stars by language

In [31]:
github_apps_list_complete.groupby(["primary_language"])["stars"].mean().sort_values(ascending=False)

primary_language
Lua            13392.000000
Rust            8795.285714
Haskell         4158.000000
C++             3976.727273
Clojure         3712.000000
Objective-C     3232.500000
TypeScript      2325.319149
Scala           1350.000000
C                949.173913
Nix              743.000000
Vue              714.000000
Kotlin           626.550000
Dart             513.687204
JavaScript       422.442623
CSS              289.250000
Java             265.948905
Python           258.666667
PHP              219.444444
GDScript         180.285714
Go               167.571429
HTML             105.920000
Xtend             96.000000
Arduino           39.000000
QML               21.000000
Shell             18.000000
Perl 6            17.000000
PLpgSQL            8.000000
C#                 7.000000
Logos              5.000000
Makefile           2.000000
Name: stars, dtype: float64

#### Number of forks by language

In [36]:
github_apps_list_complete.groupby(["primary_language"])["forks"].sum().sort_values(ascending=False)

primary_language
Java           153596.0
Kotlin         116423.0
C               61320.0
C++             58372.0
TypeScript      16798.0
Dart             8096.0
Rust             6056.0
JavaScript       4149.0
Clojure          2949.0
Lua              1121.0
Scala            1038.0
Objective-C       989.0
Python            976.0
PHP               480.0
Go                430.0
Haskell           374.0
HTML              354.0
CSS               213.0
GDScript          207.0
Xtend              63.0
Nix                46.0
Vue                40.0
Arduino            18.0
PLpgSQL             8.0
Shell               7.0
QML                 6.0
Perl 6              2.0
Logos               2.0
Makefile            0.0
C#                  0.0
Name: forks, dtype: float64

#### Java projects with most stars

In [40]:
java_filter = (github_apps_list_complete["primary_language"] == "Java").fillna(False)
github_apps_list_complete[java_filter] \
        .sort_values("stars", ascending=False) \
        .drop_duplicates(subset=["package"]) \
        .head(20)

Unnamed: 0,name,package,summary,category,link,author,author_contact,license,license_page,website,...,source_code,build_metadata,stars,primary_language,is_archived,is_disabled,updated_at,clone_url,forks,is_fork
936,Barcode Scanner,com.google.zxing.client.android,"Scan barcodes on products, or barcodes contain...",multimedia,https://f-droid.org/en/packages/com.google.zxi...,ZXing Team,mailto:srowen@gmail.com?subject=F-Droid - Free...,Apache License 2.0,https://www.apache.org/licenses/LICENSE-2.0,https://github.com/zxing/zxing/wiki/Frequently...,...,https://github.com/zxing/zxing,https://gitlab.com/fdroid/fdroiddata/tree/mast...,31421.0,Java,False,False,2023-08-28 10:46:54,https://github.com/zxing/zxing.git,9391.0,False
844,NewPipe,org.schabi.newpipe,Lightweight YouTube frontend,multimedia,https://f-droid.org/en/packages/org.schabi.new...,Team NewPipe,mailto:tnp@schabi.org?subject=F-Droid - Free a...,GNU General Public License v3.0 or later,https://www.gnu.org/licenses/gpl-3.0-standalon...,https://newpipe.net,...,https://github.com/TeamNewPipe/NewPipe,https://gitlab.com/fdroid/fdroiddata/tree/mast...,25065.0,Java,False,False,2023-08-28 21:07:16,https://github.com/TeamNewPipe/NewPipe.git,2749.0,False
3932,Termux,com.termux,Terminal emulator with packages,development,https://f-droid.org/en/packages/com.termux/,,,GNU General Public License v3.0 only,https://www.gnu.org/licenses/gpl-3.0-standalon...,https://termux.com,...,https://github.com/termux/termux-app,https://gitlab.com/fdroid/fdroiddata/tree/mast...,24082.0,Java,False,False,2023-08-28 21:09:57,https://github.com/termux/termux-app.git,2761.0,False
3704,Mindustry,io.anuke.mindustry,A factory-based sandbox tower defense game,games,https://f-droid.org/en/packages/io.anuke.mindu...,,,GNU General Public License v3.0 or later,https://www.gnu.org/licenses/gpl-3.0-standalon...,https://anuke.itch.io/mindustry,...,https://github.com/Anuken/Mindustry,https://gitlab.com/fdroid/fdroiddata/tree/mast...,18827.0,Java,False,False,2023-08-28 20:29:45,https://github.com/Anuken/Mindustry.git,2618.0,False
3051,VirtualXposed,io.va.exposed,"Use Xposed without root, unlock the bootloader...",system,https://f-droid.org/en/packages/io.va.exposed/,,,GNU General Public License v3.0 only,https://www.gnu.org/licenses/gpl-3.0-standalon...,https://vxp.app/,...,https://github.com/android-hacker/VirtualXposed,https://gitlab.com/fdroid/fdroiddata/tree/mast...,14598.0,Java,False,False,2023-08-28 12:18:09,https://github.com/android-hacker/VirtualXpose...,2459.0,False
3942,GitHub,com.github.mobile,Official Github client,development,https://f-droid.org/en/packages/com.github.mob...,Pockethub,mailto:pockethubers@gmail.com?subject=F-Droid ...,Apache License 2.0,https://www.apache.org/licenses/LICENSE-2.0,,...,https://github.com/github/android,https://gitlab.com/fdroid/fdroiddata/tree/mast...,9401.0,Java,False,False,2023-08-26 14:57:08,https://github.com/pockethub/PocketHub.git,3528.0,False
2906,Aegis Authenticator,com.beemdevelopment.aegis,"Free, secure and open source 2FA app to manage...",security,https://f-droid.org/en/packages/com.beemdevelo...,Beem Development,mailto:beemdevelopment@gmail.com?subject=F-Dro...,GNU General Public License v3.0 only,https://www.gnu.org/licenses/gpl-3.0-standalon...,https://getaegis.app,...,https://github.com/beemdevelopment/Aegis,https://gitlab.com/fdroid/fdroiddata/tree/mast...,6042.0,Java,False,False,2023-08-28 21:33:37,https://github.com/beemdevelopment/Aegis.git,291.0,False
1069,AntennaPod,de.danoeh.antennapod,"Easy-to-use, flexible and open-source podcast ...",multimedia,https://f-droid.org/en/packages/de.danoeh.ante...,info@antennapod.org,mailto:info@antennapod.org?subject=F-Droid - F...,GNU General Public License v3.0 only,https://www.gnu.org/licenses/gpl-3.0-standalon...,https://antennapod.org,...,https://github.com/AntennaPod/AntennaPod,https://gitlab.com/fdroid/fdroiddata/tree/mast...,5090.0,Java,False,False,2023-08-28 17:24:20,https://github.com/AntennaPod/AntennaPod.git,1237.0,False
3002,Amaze,com.amaze.filemanager,An open-source file manager following the Mate...,system,https://f-droid.org/en/packages/com.amaze.file...,Team Amaze,mailto:vishalmeham2@gmail.com?subject=F-Droid ...,GNU General Public License v3.0 or later,https://www.gnu.org/licenses/gpl-3.0-standalon...,https://teamamaze.xyz,...,https://github.com/TeamAmaze/AmazeFileManager,https://gitlab.com/fdroid/fdroiddata/tree/mast...,4678.0,Java,False,False,2023-08-28 10:33:00,https://github.com/TeamAmaze/AmazeFileManager.git,1477.0,False
591,Nekogram X,nekox.messenger,The third-party Telegram android app.,internet,https://f-droid.org/en/packages/nekox.messenger/,,,GNU General Public License v3.0 or later,https://www.gnu.org/licenses/gpl-3.0-standalon...,https://github.com/NekoX-Dev/NekoX,...,https://github.com/NekoX-Dev/NekoX,https://gitlab.com/fdroid/fdroiddata/tree/mast...,4186.0,Java,False,False,2023-08-28 13:48:57,https://github.com/NekoX-Dev/NekoX.git,326.0,False
