<img width="10%" alt="Naas" src="https://landen.imgix.net/jtci2pxwjczr/assets/5ice39g4.png?w=160"/>

# Awesome Notebooks - Update notebook dates
<a href="https://app.naas.ai/user-redirect/naas/downloader?url=https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/template.ipynb" target="_parent">
<img src="https://naasai-public.s3.eu-west-3.amazonaws.com/open_in_naas.svg"/>
</a>

**Tags:** #jupyter #awesome-notebooks

**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel/)

**Description:** This notebook updates dates on all awesome-notebooks and send data to a Google Sheet spreadsheets.

## Input

### Import libraries

In [None]:
import os
import requests
import pandas as pd
import urllib.parse
try:
    from git import Repo
except:
    !pip install GitPython
    from git import Repo
from naas_drivers import markdown
from pprint import pprint
import json
from papermill.iorw import (
    load_notebook_node,
    write_ipynb,
)
import copy
import hashlib
import uuid
from datetime import datetime
import naas
from naas_drivers import gsheet

### Setup Variables

In [None]:
# Inputs
repo_url = "https://github.com/jupyter-naas/awesome-notebooks"
token = naas.secret.get("GITHUB_TOKEN")

## Model

### Get notebooks from master branch

In [None]:
def get_all_notebooks(branch):
    # Init
    html_url_base = "https://github.com/jupyter-naas/awesome-notebooks/blob/master"
    raw_url_base = "https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master"
    notebooks = []
    res_json = []
    
    # Get tree from json
    url = f"https://api.github.com/repos/jupyter-naas/awesome-notebooks/git/trees/{branch}?recursive=1"
    headers = {"Authorization": f"token {token}"}
    res = requests.get(url, headers=headers)
    if res.status_code == 200:
        res_json = res.json().get("tree")
    else:
        print(res.json())
    
    # Loop on files
    for r in res_json:
        root = None
        file_name = None
        file_path = r.get("path")
        notebook_path = urllib.parse.quote(file_path)
        if ".github" not in file_path and ".gitignore" not in file_path and "/" in file_path:
            if file_path.endswith(".ipynb"):
                data = {
                    "tool": file_path.split("/")[0],
                    "notebook_name": file_path.split("/")[1],
                    "notebook_path": file_path,
                    "html_url": f"{html_url_base}/{notebook_path}",
                    "raw_url": f"{raw_url_base}/{notebook_path}",
                }
                notebooks.append(data)
    return pd.DataFrame(notebooks)

df_master = get_all_notebooks("master")
print("✅ Notebooks fetched from master branch:", len(df_master))
df_master.head(1)

### Get notebooks from current branch

In [None]:
# Connect to GitHub and get branch
repo = Repo('.')
branch = repo.active_branch
    
df_notebooks = get_all_notebooks(branch.name)
print(f"✅ Notebooks fetched from {branch.name} branch:", len(df_notebooks))
df_notebooks.head(1)

## Output

### Get created and last update date from notebook file path

In [None]:
def get_dates(token, file_path):
    # Init
    created_at = ""
    updated_at = ""
    file_path = urllib.parse.quote(file_path)
    
    # Requests
    url = f"https://api.github.com/repos/jupyter-naas/awesome-notebooks/commits?path={file_path}"
    headers = {"Authorization": f"token {token}"}
    res = requests.get(url, headers=headers)
    
    # Response
    if res.status_code == 200:
        commits = res.json()
        if len(commits) > 0:
            for i, c in enumerate(commits):
                index = i+1
                message = commits[-(index)].get("commit").get("message")
#                 if message == "generateReadme: Refresh" and created_at == "":
#                     created_at = commits[-(index-1)]['commit']['committer']['date']
                if message != "generateReadme: Refresh":
                    updated_at = commits[-(index)]['commit']['committer']['date']
                    
        if created_at == "":
            created_at = commits[-1]['commit']['committer']['date']
        if updated_at == "":
            updated_at = commits[0]['commit']['committer']['date']
    return created_at[:10], updated_at[:10]
        
file_path = 'OpenAI/OpenAI_Generate_Q&A.ipynb'
created_at, updated_at = get_dates(token, file_path)
print("Notebook:", file_path)
print("- Created:", created_at)
print("- Last update:", updated_at)

### Create dates on notebooks

In [None]:
# Loop on notebooks
for row in df_notebooks.itertuples():
    # Get notebook path
    notebook_path = row.notebook_path
    
    # Get dates
    if notebook_path in df_master["notebook_path"].tolist():
        created_at, updated_at = get_dates(token, notebook_path)
    else:
        created_at = updated_at = datetime.now().strftime("%Y-%m-%d")
        
    df_notebooks.loc[row.Index, "created_at"] = created_at
    df_notebooks.loc[row.Index, "updated_at"] = updated_at

    # Load notebook
    nb_init = load_notebook_node(notebook_path)
    nb = copy.deepcopy(nb_init)

    # Init new cells
    new_cells = []

    # Loop on cells
    cells = nb.cells
    for i, cell in enumerate(cells):
        source = cell.source
        if i == 4 and not source.startswith("**Last update:**"):
            cell_date = {
                'cell_type': 'markdown',
                'id': str(uuid.uuid4()),
                'metadata': {'papermill': {}, 'tags': []},
                'source': f'**Last update:** {updated_at} (Created: {created_at})'
            }
            new_cells.append(cell_date)
        elif i == 4 and source.startswith("**Last update:**"):
            cell["source"] = f'**Last update:** {updated_at} (Created: {created_at})'
        new_cells.append(cell)
            
    # Update cells
#     if new_cells != cells:
    nb.cells = new_cells
    with open(notebook_path, "w") as f:
        json.dump(nb, f)
    print(f"✅ {notebook_path} saved in Naas.")