### SAP Machine Learning Embedding in OpenAI - step 03
##### Author: Sergiu Iatco. May, 2023
https://people.sap.com/iatco.sergiu <br>
https://www.linkedin.com/in/sergiuiatco/ <br>

In [1]:
import urllib.request
import os
import chardet
from nbconvert import HTMLExporter
import nbformat
import nbconvert
import pathlib

class collect_ipynb():
    def __init__(self):
        pass
    
    def ipynb_to_html(self, ipynb_file, path_save = None, encoding = None, content = False, verbose = 0):
        # verbose: 0 - Completion, 1 - Source & Destination
        if encoding==None:
            encoding = 'utf-8'
            
        elif ecnoding=='detect':
            with open(ipynb_file, 'rb') as f:
                nb_contents = f.read()

            encoding = chardet.detect(nb_contents)['encoding']

        # read nbformat
        with open(ipynb_file, encoding=encoding) as f:
            nb_content = nbformat.read(f, as_version=4)

        html_exporter = HTMLExporter()
        #     html_exporter = HTMLExporter(template_name='classic')
        (body, resources) = html_exporter.from_notebook_node(nb_content)

        filename_path = os.path.split(ipynb_file)[0] 
        filename_with_ext = os.path.split(ipynb_file)[1]  #filename with ext & w/o path
        filename_ext_html = os.path.splitext(filename_with_ext)[0] + '.html'
        
        if path_save!= None:
            html_file = os.path.join(path_save, filename_path, filename_ext_html)
            
            path_save_subdir_source = os.path.split(html_file)[0] # file path
            
            if not os.path.exists(path_save_subdir_source):
                os.makedirs(path_save_subdir_source)
        else:
            html_file = os.path.join(filename_path, filename_ext_html)
    
        with open(html_file, 'w', encoding=encoding) as f:
            f.write(body)

        if verbose == 1:
            print(f"Source     : {ipynb_file}")
            print(f"Destination: {html_file}")
            
        if os.path.exists(html_file):
            print("File conversion complete and saved!")
        else:
            print("File was not saved!")
        
        if content == True:
            return html_file

    def ipynb_path_to_html(self, repo_path = None, path_save = None, encoding = None, verbose = 0):
        # verbose: 0 - Complete message | 1 - Source file & Saved file

        if repo_path==None:
            repo_path = ''

        name_filter = "**/*.ipynb"
        repo_path_lib = pathlib.Path(repo_path)
        document_files = list(repo_path_lib.glob(name_filter))
        
        for ipynb_file in document_files:
            self.ipynb_to_html(ipynb_file, path_save = path_save, encoding = encoding, content = False, verbose = verbose)
        
        print(f"List conversion complete! Files: {len(document_files)}")
    

In [2]:
# # Example 1 read current folder | save current folder
# ci = collect_ipynb()
# ipynb_file ="example notebook v1.ipynb"
# ci.ipynb_to_html(ipynb_file)

In [3]:
# # Example 2 read current folder | save path_save
# ci = collect_ipynb()
# ipynb_file ="example notebook v1.ipynb"
# path_save = 'html_files2'
# ci.ipynb_to_html(ipynb_file, path_save = path_save)

In [4]:
# # Example 3 read file with folder | save same folder
# ci = collect_ipynb()
# ipynb_file ="example notebook v1.ipynb"
# ipynb_source = "ipynb_source_test/"
# ipynb_file = ipynb_source + ipynb_file
# # path_save = 'html_files'
# # display(ipynb_file)
# # display(path_save)
# ci.ipynb_to_html(ipynb_file, verbose = 1)
# # ci.ipynb_to_html(ipynb_file, path_save = path_save)

In [5]:
# # Example 4 read file with folder | save path_save subdir
# ci = collect_ipynb()
# ipynb_file ="example notebook v1.ipynb"
# ipynb_source = "ipynb_source_test/"
# ipynb_file = ipynb_source + ipynb_file
# display(ipynb_file)
# path_save = 'html_files_test'
# display(path_save)
# display(os.path.splitext(ipynb_file))
# display(os.path.split(ipynb_file))
# ci.ipynb_to_html(ipynb_file, path_save = path_save, verbose = 1)

In [6]:
# # Example 5 read from repo_path and save to path_save
# repo_path = "ipynb_hana_ml_samples/Python-API/usecase-examples/sapcommunity-hanaml-challenge"
# path_save = 'ipynb_to_html_test/'
# ci.ipynb_path_to_html(repo_path, path_save, encoding = None, verbose = 1)

In [7]:
repo_path1 = "llama_challenge/ipynb_hana_ml_samples/Python-API/usecase-examples/sapcommunity-hanaml-challenge"
# path_save = 'llama_challenge/ipynb_hana_ml_samples_to_html/'
# path_save = 'llama_challenge/ipynb_samples_html/'

ci = collect_ipynb()
# ci.ipynb_path_to_html(repo_path, path_save, encoding = None, verbose = 1)
ci.ipynb_path_to_html(repo_path1, encoding = None, verbose = 1)

Source     : llama_challenge\ipynb_hana_ml_samples\Python-API\usecase-examples\sapcommunity-hanaml-challenge\10 Connectivity Check.ipynb
Destination: llama_challenge\ipynb_hana_ml_samples\Python-API\usecase-examples\sapcommunity-hanaml-challenge\10 Connectivity Check.html
File conversion complete and saved!
Source     : llama_challenge\ipynb_hana_ml_samples\Python-API\usecase-examples\sapcommunity-hanaml-challenge\20 Data upload.ipynb
Destination: llama_challenge\ipynb_hana_ml_samples\Python-API\usecase-examples\sapcommunity-hanaml-challenge\20 Data upload.html
File conversion complete and saved!
Source     : llama_challenge\ipynb_hana_ml_samples\Python-API\usecase-examples\sapcommunity-hanaml-challenge\PAL Tutorial - Unified Classification Hybrid Gradient Boosting - PredictiveQuality Example.ipynb
Destination: llama_challenge\ipynb_hana_ml_samples\Python-API\usecase-examples\sapcommunity-hanaml-challenge\PAL Tutorial - Unified Classification Hybrid Gradient Boosting - PredictiveQualit

In [8]:
repo_path2 = "llama_challenge/ipynb_blog/"
# path_save2 = 'llama_challenge/ipynb_blog_to_html/'
ci = collect_ipynb()
# ci.ipynb_path_to_html(repo_path2, path_save2, encoding = None, verbose = 1)
ci.ipynb_path_to_html(repo_path2, encoding = None, verbose = 1)

Source     : llama_challenge\ipynb_blog\SAP HANA ML challendge - CHURN  v2.3 max.ipynb
Destination: llama_challenge\ipynb_blog\SAP HANA ML challendge - CHURN  v2.3 max.html
File conversion complete and saved!
List conversion complete! Files: 1


In [9]:
import pathlib

def list_ipynb(repo_path, extension):
    name_filter = f"**/*.{extension}"
    repo_path_lib = pathlib.Path(repo_path)
    files = list(repo_path_lib.glob(name_filter))
    for file in files:
        print(file)

In [10]:
list_ipynb(repo_path1, "html")

llama_challenge\ipynb_hana_ml_samples\Python-API\usecase-examples\sapcommunity-hanaml-challenge\10 Connectivity Check.html
llama_challenge\ipynb_hana_ml_samples\Python-API\usecase-examples\sapcommunity-hanaml-challenge\20 Data upload.html
llama_challenge\ipynb_hana_ml_samples\Python-API\usecase-examples\sapcommunity-hanaml-challenge\PAL Tutorial - Unified Classification Hybrid Gradient Boosting - PredictiveQuality Example.html
llama_challenge\ipynb_hana_ml_samples\Python-API\usecase-examples\sapcommunity-hanaml-challenge\Upload and explore Employee Churn data.html


In [13]:
# repo_path = 'llama_challenge/ipynb_blog_to_html/'
list_ipynb(repo_path2, "html")

llama_challenge\ipynb_blog\SAP HANA ML challendge - CHURN  v2.3 max.html
