In [39]:
!pip install pathos

Collecting pathos
  Obtaining dependency information for pathos from https://files.pythonhosted.org/packages/d8/08/ac94fa6f9eefe32963b8a54e573dab0dbc0d3df24fd34924bd9ce7eab7c4/pathos-0.3.1-py3-none-any.whl.metadata
  Downloading pathos-0.3.1-py3-none-any.whl.metadata (11 kB)
Collecting ppft>=1.7.6.7 (from pathos)
  Obtaining dependency information for ppft>=1.7.6.7 from https://files.pythonhosted.org/packages/f0/f8/0a493dfdf73edbfe58cae1323aec72d0152f463c7a351bd285e9d500985c/ppft-1.7.6.7-py3-none-any.whl.metadata
  Downloading ppft-1.7.6.7-py3-none-any.whl.metadata (12 kB)
Collecting pox>=0.3.3 (from pathos)
  Obtaining dependency information for pox>=0.3.3 from https://files.pythonhosted.org/packages/17/c7/ef7e37e5a895f5de068b408a52bee0710b1092574b6b4ab247a767e9fbd5/pox-0.3.3-py3-none-any.whl.metadata
  Downloading pox-0.3.3-py3-none-any.whl.metadata (8.0 kB)
Collecting multiprocess>=0.70.15 (from pathos)
  Obtaining dependency information for multiprocess>=0.70.15 from https://files.

In [2]:
import pandas as pd
import requests
import time
from pathos.multiprocessing import ProcessingPool as Pool  # Import Pool from pathos
from datetime import datetime

In [3]:
TOKEN = "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"  # Replace with your GitHub Personal Access Token
HEADERS = {
    "Authorization": f"token {TOKEN}",
    "Accept": "application/vnd.github.v3+json",
}
# Define the maximum number of threads for parallel execution
MAX_THREADS = 10  # You can adjust this value

In [4]:
# Calculate days ago from a given timestamp string
def days_ago(timestamp_str):
    datetime_obj = datetime.strptime(timestamp_str, "%Y-%m-%dT%H:%M:%SZ")
    return (datetime.utcnow() - datetime_obj).days


def fetch_details_and_update_df_v3(data):
    idx, row = data
    new_row = row.copy()
    try:
        details = get_repo_details(row["repo_url"])
        if details:
            for key, value in details.items():
                new_row[key] = value
    except Exception as e:
        print(f"Error fetching details for {row['repo_url']}: {e}")
        for key in [
            "created_at",
            "updated_at",
            "repo_name",
            "repo_description",
            "readme_text",
            "stars",
        ]:
            new_row[key] = "N/A"
    return pd.DataFrame([new_row])


def get_repo_details(url):
    # Extract username and repo name from the URL
    parts = url.split("/")
    user, repo_name = parts[-2], parts[-1]

    # Fetch repository details
    repo_api_url = f"https://api.github.com/repos/{user}/{repo_name}"
    response = requests.get(repo_api_url, headers=HEADERS)
    if response.status_code != 200:
        print(f"Failed to fetch details for {url}. Status code: {response.status_code}")
        return None

    repo_data = response.json()

    # Fetch README details
    readme_api_url = f"https://api.github.com/repos/{user}/{repo_name}/readme"
    readme_response = requests.get(readme_api_url, headers=HEADERS)
    readme_text = ""
    if readme_response.status_code == 200:
        readme_data = readme_response.json()
        readme_text = requests.get(readme_data["download_url"]).text

    # Extract required details
    details = {
        "created_at": repo_data.get("created_at", ""),
        "updated_at": repo_data.get("updated_at", ""),
        "repo_name": repo_data.get("name", ""),
        "repo_description": repo_data.get("description", ""),
        "readme_text": readme_text,
        "stars": repo_data.get("stargazers_count", 0),
    }
    return details

In [68]:
df = final_df

In [59]:
# Load the data
df = pd.read_csv("~/Downloads/obsidian (1).csv", thousands=",", quotechar='"')

In [44]:
# Initial number of rows
initial_rows = df.shape[0]
initial_rows

1141

In [45]:
# Drop rows with missing values
df.dropna(
    subset=["plugin_name", "plugin_author", "num_downloads", "description", "repo_url"],
    inplace=True,
)

# Number of rows dropped
dropped_rows = initial_rows - df.shape[0]
dropped_rows

0

In [46]:
# Remove "By " from plugin_author
df["plugin_author"] = df["plugin_author"].str.replace("By\xa0", "")

df["num_downloads"] = (
    df["num_downloads"].str.replace(" downloads", "").str.replace(",", "").astype(int)
)

github_df = df[df["repo_url"].str.contains("github.com")]

print(len(github_df))
print(len(df))

1141
1141


In [47]:
# Fetch details for each repo and append to the DataFrame in parallel
with Pool(processes=MAX_THREADS) as pool:
    results = pool.map(fetch_details_and_update_df_v3, df.iterrows())

In [67]:
# Add the new columns
df["created_days_ago"] = df["created_at"].apply(days_ago)
df["updated_days_ago"] = df["updated_at"].apply(days_ago)
df["short_description"] = (
    df["plugin_name"]
    + " "
    + df["repo_description"].fillna("")
    + " "
    + df["description"]
)
df["long_description"] = df["short_description"] + " " + df["readme_text"]

# Keep the desired columns
final_df = df[
    [
        "plugin_name",
        "num_downloads",
        "description",
        "stars",
        "short_description",
        "long_description",
        "created_at",
        "updated_at",
        "updated_days_ago",
        "created_days_ago",
    ]
]

final_df.head()

Unnamed: 0,plugin_name,num_downloads,description,stars,short_description,long_description,created_at,updated_at,updated_days_ago,created_days_ago
0,Source Code Note,416,Helps you organize source code note easily.,11,Source Code Note The obsidian plugin can help ...,Source Code Note The obsidian plugin can help ...,2023-03-10T09:44:13Z,2023-06-21T10:23:48Z,67,170
1,At People,313,Use the familiar @ notation to cross link to p...,7,At People Obsidian plugin to use the familiar ...,At People Obsidian plugin to use the familiar ...,2023-08-17T02:04:56Z,2023-08-27T20:40:51Z,0,11
2,Image Captions,4951,Adds captions to images when there is alt-text...,53,Image Captions Adds captions to images when th...,Image Captions Adds captions to images when th...,2023-01-04T16:41:19Z,2023-08-12T04:56:58Z,16,235
3,Scroll Speed,1430,Change the scroll speed in Obsidian notes.,16,Scroll Speed Obsidian Plugin to Change Scroll ...,Scroll Speed Obsidian Plugin to Change Scroll ...,2021-10-20T10:51:00Z,2023-07-28T19:19:48Z,30,676
4,Latex Environments,14980,Quickly insert and change LaTeX environments w...,50,Latex Environments Quickly insert and change l...,Latex Environments Quickly insert and change l...,2020-11-04T23:57:39Z,2023-07-19T08:56:11Z,39,1026


In [48]:
# Combine all the mini DataFrames from the results into one DataFrame
final_df = pd.concat(results).reset_index(drop=True)
df = final_df

In [6]:
df.head()

Unnamed: 0.1,Unnamed: 0,plugin_name,num_downloads,description,stars,short_description,long_description,created_at,updated_at,updated_days_ago,created_days_ago
0,0,Source Code Note,416,Helps you organize source code note easily.,11,Source Code Note The obsidian plugin can help ...,Source Code Note The obsidian plugin can help ...,2023-03-10T09:44:13Z,2023-06-21T10:23:48Z,67,170
1,1,At People,313,Use the familiar @ notation to cross link to p...,7,At People Obsidian plugin to use the familiar ...,At People Obsidian plugin to use the familiar ...,2023-08-17T02:04:56Z,2023-08-27T20:40:51Z,0,11
2,2,Image Captions,4951,Adds captions to images when there is alt-text...,53,Image Captions Adds captions to images when th...,Image Captions Adds captions to images when th...,2023-01-04T16:41:19Z,2023-08-12T04:56:58Z,16,235
3,3,Scroll Speed,1430,Change the scroll speed in Obsidian notes.,16,Scroll Speed Obsidian Plugin to Change Scroll ...,Scroll Speed Obsidian Plugin to Change Scroll ...,2021-10-20T10:51:00Z,2023-07-28T19:19:48Z,30,676
4,4,Latex Environments,14980,Quickly insert and change LaTeX environments w...,50,Latex Environments Quickly insert and change l...,Latex Environments Quickly insert and change l...,2020-11-04T23:57:39Z,2023-07-19T08:56:11Z,39,1026


In [50]:
# Save the updated DataFrame
df.to_csv("obsidian_with_github_details.csv", index=False)
print("Data fetching completed. Results saved to obsidian_with_github_details.csv.")

Data fetching completed. Results saved to obsidian_with_github_details.csv.


In [52]:
df.iloc[0]

plugin_name                                          Source Code Note
plugin_author                                                 Waiting
num_downloads                                                     416
description               Helps you organize source code note easily.
repo_url            https://github.com/waiting0324/obsidian-code-note
created_at                                       2023-03-10T09:44:13Z
updated_at                                       2023-06-21T10:23:48Z
repo_name                                          obsidian-code-note
repo_description    The obsidian plugin can help you organize sour...
readme_text         # Obsidian Source Code Note Plugin\n\n[中文文檔](....
stars                                                              11
Name: 0, dtype: object

In [55]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1141 entries, 0 to 1140
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   plugin_name       1141 non-null   object
 1   plugin_author     1141 non-null   object
 2   num_downloads     1141 non-null   int64 
 3   description       1141 non-null   object
 4   repo_url          1141 non-null   object
 5   created_at        1141 non-null   object
 6   updated_at        1141 non-null   object
 7   repo_name         1141 non-null   object
 8   repo_description  936 non-null    object
 9   readme_text       1141 non-null   object
 10  stars             1141 non-null   int64 
dtypes: int64(2), object(9)
memory usage: 98.2+ KB


In [57]:
df.iloc[0]

plugin_name                                          Source Code Note
plugin_author                                                 Waiting
num_downloads                                                     416
description               Helps you organize source code note easily.
repo_url            https://github.com/waiting0324/obsidian-code-note
created_at                                       2023-03-10T09:44:13Z
updated_at                                       2023-06-21T10:23:48Z
repo_name                                          obsidian-code-note
repo_description    The obsidian plugin can help you organize sour...
readme_text         # Obsidian Source Code Note Plugin\n\n[中文文檔](....
stars                                                              11
Name: 0, dtype: object

451

In [32]:
pd.set_option("display.max_colwidth", 1000)
# show 100 rows at a time
pd.set_option("display.max_rows", 100)

# Filter rows where column 'a' is greater than 100
fdf = df
fdf = fdf[fdf["updated_days_ago"] < 60]
fdf = fdf[fdf["num_downloads"] > 1000]
fdf = fdf[fdf["created_days_ago"] < 250]
# fdf = fdf[fdf["stars"] > 0]
# fdf = fdf[fdf["short_description"].str.contains(r"\b(block|link|embedding|transclude|transclusion)\b", case=False, regex=True)]
print(len(fdf))

# Sort filtered rows by column 'b' in descending order
sorted_df = fdf.sort_values(by="created_days_ago", ascending=True)
# sorted_df = fdf.sort_values(by="num_downloads", ascending=False)

sorted_df = sorted_df[
    [
        "plugin_name",
        "short_description",
        "num_downloads",
        # "updated_days_ago",
        "created_days_ago",
    ]
]

sorted_df.head(100)

97


Unnamed: 0,plugin_name,short_description,num_downloads,created_days_ago
507,Editor Width Slider,Editor Width Slider With this plugin you can set the line width of the editor in obsidian. Customize Obsidian's editor width with a slider for a tailored editing experience.,1026,44
611,Importer,"Importer Obsidian Importer lets you import notes from other apps and file formats into your Obsidian vault. Import data from Notion, Evernote, Google Keep, Bear, and HTML files.",11500,47
1107,Colored Tags,Colored Tags Colorizes tags in different colors. Colorizes tags in different colors. The color depends on the tag content. Colors of nested tags are mixed with parent tags. Text color contrast is automatically matched to comply with AA level WCAG 2.1.,2032,50
939,Code Styler,Code Styler A plugin for Obsidian.md for styling codeblocks and inline code Style codeblocks and inline code in reading view and editing view.,1302,52
65,Time Ruler,"Time Ruler A drag-and-drop time ruler combining the best of a task list and a calendar view (integrates with Tasks, Full Calendar, and Dataview). A drag-and-drop time ruler combining the best of a task list and a calendar view (integrates with Tasks, Full Calendar, and Dataview).",2125,84
801,Pieces for Developers,"Pieces for Developers Pieces' powerful extension for Obsidian-MD that allows users to access their code snippets directly within the Obsidian workspace Streamline your coding workflow in Obsidian with the Pieces For Developers plugin, offering powerful features for capturing, managing, translating, and enhancing code snippets. (Closed Source)",1696,87
332,Link with alias,Link with alias Obsidian plugin for handy creation of links and alias in front matter of target document Creates links and aliases in front matter of target document.,1009,103
1038,Attachment Management,Attachment Management Attachment Management of Obsidian Attachment management plugin to custom the attachment path and auto rename etc.,1599,113
817,Last Modified Timestamp in Status Bar,Last Modified Timestamp in Status Bar Dynamic display of file modification timestamp in the status bar.,1155,122
1124,Confluence Integration,Confluence Integration Sync / Publish Obsidian Notes to Confluence Publish markdown content from Obsidian to Atlassian Confluence. It supports some Obsidian markdown extensions for richer content.,1654,123


In [14]:
sorted_df.head(50)
# show more of the description

Unnamed: 0,plugin_name,description,num_downloads,updated_days_ago
184,Advanced Slides,Create Markdown-based presentations in Obsidian.,629499,0
958,Remotely Save,"Yet another unofficial plugin allowing users to sync notes between local device and the cloud service (S3, Dropbox, webdav, OneDrive).",230592,0
58,Omnisearch,"Intelligent search for your notes, PDFs, and OCR for images.",183074,0
426,Text Generator,Text generator is a handy plugin that helps you generate text content using GPT-3 (OpenAI).,129142,0
90,Hover Editor,Transform the Page Preview hover popover into a fully working editor instance.,127499,0
1003,Zotero Integration,"Insert and import citations, bibliographies, notes, and PDF annotations from Zotero.",122480,0
954,Projects,Project management for Obsidian.,119997,1
577,MAKE.md,Make.md brings you features that supercharges Obsidian. Sort your files in custom order and add file icons using Spaces. Edit inline embeds with Flow Editor. And style your text and add new Markdown blocks without writing Markdown using Maker Mode.,118258,0
121,DataLoom,Weave together data from diverse sources into a cohesive table view. Inspired by Excel spreadsheets and Notion.so.,116041,0
236,Full Calendar,Keep events and manage your calendar alongside all your other notes in your Obsidian vault.,109914,0


In [89]:
df.to_csv("obsidian_with_github_details.csv", index=True)

In [90]:
ls

aliases.zsh                       [1m[36mplugins[m[m/
antigen.log                       secrets.sh
fzf.zsh                           [35msecrets.zsh[m[m@
obsidian_with_github_details.csv  zsh_omz.code-workspace
old_zshrc_configs.txt             zshrc
package_plugin_setup.zsh          zshrc.zwc
personal_zsh_configs.zsh


In [5]:
df = pd.read_csv("obsidian_plugins_with_repo_details.csv")