In [1]:

# Set up notebook
%pprint
%matplotlib inline
import sys
import os.path as osp, os as os

executable_path = sys.executable
scripts_folder = osp.join(osp.dirname(executable_path), 'Scripts'); assert osp.exists(scripts_folder)
py_folder = osp.abspath(osp.join(os.pardir, 'py')); assert osp.exists(py_folder), "Create the py folder"
ffmpeg_folder = r'C:\ffmpeg\bin'; assert osp.exists(ffmpeg_folder)
shared_folder = osp.abspath(osp.join(os.pardir, 'share')); assert osp.exists(shared_folder)

if (scripts_folder not in sys.path): sys.path.insert(1, scripts_folder)
if (py_folder not in sys.path): sys.path.insert(1, py_folder)
if (ffmpeg_folder not in sys.path): sys.path.insert(1, ffmpeg_folder)
if shared_folder not in sys.path: sys.path.insert(1, shared_folder)

from notebook_utils import NotebookUtilities
nu = NotebookUtilities(
    data_folder_path=osp.abspath(osp.join(os.pardir, 'data')),
    saves_folder_path=osp.abspath(osp.join(os.pardir, 'saves'))
)

# Import needed libraries
import re
import pandas as pd
import pyperclip
import ipywidgets as widgets
from IPython.display import display
import inspect

Pretty printing has been turned OFF


In [4]:

[f'nu.{fn}' for fn in dir(nu) if 'encod' in fn]

['nu.encode', 'nu.encoding_error', 'nu.encoding_errors_list', 'nu.encoding_type', 'nu.encoding_types_list', 'nu.one_hot_encode']

In [21]:

import pkgutil

# Get built-in module names
built_in_modules = set(sys.builtin_module_names)

# Get pure Python modules from the standard library
std_lib_path = os.path.dirname(os.__file__)
std_lib_modules = set([
    module_info.name
    for module_info in pkgutil.iter_modules([std_lib_path])
])

# Combine both lists
standard_library_modules = built_in_modules | std_lib_modules

# Sort the list for easier reading
standard_library_modules = sorted(standard_library_modules)

print(standard_library_modules)



In [19]:

import ast
import re

IMPORTS_REGEX = re.compile(r"^\s*(?:import\s+[\w.]+(?:\s+as\s+\w+)?(?:\s*,\s*[\w.]+(?:\s+as\s+\w+)?)*|from\s+[\w.]+\s+import\s+(?:[\w.]+(?:\s+as\s+\w+)?(?:\s*,\s*[\w.]+(?:\s+as\s+\w+)?)*|\(\s*[\w.]+(?:\s+as\s+\w+)?(?:\s*,\s*[\w.]+(?:\s+as\s+\w+)?)*,?\s*\)))", re.MULTILINE)
def find_imports_in_file(filepath):
    """Parses a Python file with the AST module and regex to find all import statements."""
    with open(filepath, "r", encoding=nu.encoding_type) as f:
        file_content = f.read()

    # Get imports from regex
    regex_imports = []
    for regex_str in IMPORTS_REGEX.findall(file_content):
        parens_list = re.split("[)(]", regex_str.strip(), 0)
        if len(parens_list) > 1:
            prefix = parens_list[0].strip()
            commas_list = [s.strip() for s in re.split(r"\s*,\s*", parens_list[1].strip(), 0) if s.strip()]
            for comma_str in commas_list:
                regex_imports.append(prefix + ' ' + comma_str)
        elif len(parens_list) == 1:
            imports_list = [s.strip() for s in re.split(r"(from\s+[\w.]+\s+import)\s+", parens_list[0].strip(), 0) if s.strip()]
            if len(imports_list) > 1:
                prefix = imports_list[0]
                commas_list = [s.strip() for s in re.split(r"\s*,\s*", imports_list[1], 0) if s.strip()]
                for comma_str in commas_list:
                    regex_imports.append(prefix + ' ' + comma_str)
            elif len(imports_list) == 1:
                regex_imports.append(imports_list[0].strip())

    # Get imports from ast
    ast_imports = []
    try:
        root = ast.parse(file_content, filename=filepath)
    
    # If there's a syntax error in the file, return ast_imports as empty
    except SyntaxError:
        return ast_imports, regex_imports

    for node in ast.walk(root):
        
        # Collect statements like "import math, sys"
        if isinstance(node, ast.Import):
            for alias in node.names:
                statement = f"import {alias.name}"
                if alias.asname:
                    statement += f" as {alias.asname}"
                ast_imports.append(statement)
        
        # Collect statements like "from x import y as z"
        elif isinstance(node, ast.ImportFrom):
            for alias in node.names:
                module = node.module if node.module else ""
                statement = f"from {module} import {alias.name}"
                if alias.asname:
                    statement += f" as {alias.asname}"
                ast_imports.append(statement)

    return ast_imports, regex_imports

def scan_all_imports_in_folder(folder):
    """Recursively scans all .py files in a folder for import statements."""
    ast_all_imports = {}
    regex_all_imports = {}
    for root, _, files in os.walk(folder):
        for filename in files:
            if filename.endswith(".py"):
                filepath = osp.join(root, filename)
                ast_file_imports, regex_file_imports = find_imports_in_file(filepath)
                ast_all_imports[filepath] = ast_file_imports
                regex_all_imports[filepath] = regex_file_imports
    return ast_all_imports, regex_all_imports

ast_results, regex_results = scan_all_imports_in_folder(shared_folder)
filepaths = sorted(set(ast_results.keys()).union(set(regex_results.keys())))
for filepath in filepaths:
    print()
    print(f"File: {filepath}")
    ast_set = set(ast_results[filepath])
    regex_set = set(regex_results[filepath])
    print(f'Imports only in ast: {ast_set.difference(regex_set)}')
    print(f'Imports only in regex: {regex_set.difference(ast_set)}')


File: C:\Users\daveb\OneDrive\Documents\GitHub\notebooks\share\base_config.py
Imports only in ast: set()
Imports only in regex: set()

File: C:\Users\daveb\OneDrive\Documents\GitHub\notebooks\share\data_analysis.py
Imports only in ast: set()
Imports only in regex: set()

File: C:\Users\daveb\OneDrive\Documents\GitHub\notebooks\share\data_preparation.py
Imports only in ast: set()
Imports only in regex: {'from notebook_utils import NotebookUtilities'}

File: C:\Users\daveb\OneDrive\Documents\GitHub\notebooks\share\data_validation.py
Imports only in ast: set()
Imports only in regex: set()

File: C:\Users\daveb\OneDrive\Documents\GitHub\notebooks\share\file_operations.py
Imports only in ast: set()
Imports only in regex: set()

File: C:\Users\daveb\OneDrive\Documents\GitHub\notebooks\share\notebook_utils.py
Imports only in ast: set()
Imports only in regex: {'from notebook_utils import NotebookUtilities', 'import sys', 'import os.path as osp, os as os'}

File: C:\Users\daveb\OneDrive\Docume

In [4]:

dir(set)

['__and__', '__class__', '__class_getitem__', '__contains__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__iand__', '__init__', '__init_subclass__', '__ior__', '__isub__', '__iter__', '__ixor__', '__le__', '__len__', '__lt__', '__ne__', '__new__', '__or__', '__rand__', '__reduce__', '__reduce_ex__', '__repr__', '__ror__', '__rsub__', '__rxor__', '__setattr__', '__sizeof__', '__str__', '__sub__', '__subclasshook__', '__xor__', 'add', 'clear', 'copy', 'difference', 'difference_update', 'discard', 'intersection', 'intersection_update', 'isdisjoint', 'issubset', 'issuperset', 'pop', 'remove', 'symmetric_difference', 'symmetric_difference_update', 'union', 'update']

In [6]:

set([f'nu.{fn}' for fn in dir(nu.uncategorized) if any(map(lambda x: x in fn, ['folder', 'dir']))]).difference(
    set([f'nu.{fn}' for fn in dir(nu) if any(map(lambda x: x in fn, ['folder', 'dir']))])
)

set()

In [7]:

set([f'nu.{fn}' for fn in dir(nu) if any(map(lambda x: x in fn, ['folder', 'dir']))]).difference(
    set([f'nu.{fn}' for fn in dir(nu.uncategorized) if any(map(lambda x: x in fn, ['folder', 'dir']))])
)

{'nu.list_dfs_in_folder', 'nu.delete_ipynb_checkpoint_folders', 'nu.get_dir_tree'}

In [8]:

nu.__dir__()

['__add__', '__class__', '__contains__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__getitem__', '__getnewargs__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__mod__', '__module__', '__mul__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__rmod__', '__rmul__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'add_staticmethod_decorations', 'anaconda_folder', 'apply_multilevel_numbering', 'attempt_to_pickle', 'bin_folder', 'cache_folder', 'capitalize', 'casefold', 'center', 'check_4_doubles', 'check_for_typos', 'color_distance_from', 'comment_regex', 'compute_similarity', 'conjunctify_nouns', 'convert_strings_to_integers', 'convert_to_data_frame', 'convert_to_df', 'count', 'count_ngrams', 'count_swaps_to_perfect_order', 'csv_exists', 'data_analysis', 'data_csv_folder', 'data_folder', 'data_model

In [20]:

# Show the requirements.txt path
osp.abspath(osp.join(nu.github_folder, os.pardir, 'share', 'requirements.txt'))

'C:\\Users\\daveb\\OneDrive\\Documents\\GitHub\\share\\requirements.txt'

In [None]:

import glob

# Use the glob function with a recursive pattern to gather all the notebook_utils.py files
# in all the "share" folders in all the repo directories in the grandparent directory
file_paths = []
for file_path in [osp.abspath(f) for f in glob.glob('../../*/share/notebook_utils.py', recursive=True)]:
    print(file_path)
    # os.remove(file_path)

In [33]:

print([f'os.{fn}' for fn in dir(os) if 'list' in fn])

['os._get_exports_list', 'os.listdir']


In [35]:

# Find all the OS Path Navigation notebooks in the ipynb folders of my repos
pushables_list = nu.load_object('pushables_list')
ipynb_paths = [osp.join(file_path, 'ipynb') for file_path in pushables_list]
file_name = 'OS Path Navigation.ipynb'
file_paths_list = []
for parent_directory in ipynb_paths:
    if osp.isdir(parent_directory):
        for old_file_name in os.listdir(parent_directory):
            if old_file_name == file_name:
                old_file_path = os.path.join(parent_directory, old_file_name)
                file_paths_list.append(old_file_path)
for file_path in file_paths_list:
    print(file_path)

C:\Users\daveb\OneDrive\Documents\GitHub\covid19\ipynb\OS Path Navigation.ipynb
C:\Users\daveb\OneDrive\Documents\GitHub\itm-analysis-reporting-1\ipynb\OS Path Navigation.ipynb
C:\Users\daveb\OneDrive\Documents\GitHub\job-hunting\ipynb\OS Path Navigation.ipynb
C:\Users\daveb\OneDrive\Documents\GitHub\mimetic_tribes\ipynb\OS Path Navigation.ipynb
C:\Users\daveb\OneDrive\Documents\GitHub\notebooks\ipynb\OS Path Navigation.ipynb
C:\Users\daveb\OneDrive\Documents\GitHub\rpc\ipynb\OS Path Navigation.ipynb
C:\Users\daveb\OneDrive\Documents\GitHub\StatsByCountry\ipynb\OS Path Navigation.ipynb
C:\Users\daveb\OneDrive\Documents\GitHub\StatsByUSState\ipynb\OS Path Navigation.ipynb
C:\Users\daveb\OneDrive\Documents\GitHub\Strauss-Howe\ipynb\OS Path Navigation.ipynb
C:\Users\daveb\OneDrive\Documents\GitHub\transcriptions-notebook\ipynb\OS Path Navigation.ipynb


In [27]:

# Get the repositories directory by various means, with increasing conciseness
print([f'os.{fn}' for fn in dir(os) if ('path' in fn) and (len(f'os.{fn}') < len('osp.abspath'))])
print([f'osp.{fn}' for fn in dir(osp) if ('path' in fn) and (len(f'osp.{fn}') < len('osp.abspath'))])
print(osp.abspath(osp.join(os.curdir, os.pardir, os.pardir)))
print(osp.abspath(osp.join(nu.github_folder, os.pardir)))
print(osp.dirname(osp.dirname(osp.abspath(os.curdir))))
print(osp.dirname(nu.github_folder))

['os._fspath', 'os.defpath', 'os.fspath', 'os.path', 'os.pathsep']
[]
C:\Users\daveb\OneDrive\Documents\GitHub
C:\Users\daveb\OneDrive\Documents\GitHub
C:\Users\daveb\OneDrive\Documents\GitHub
C:\Users\daveb\OneDrive\Documents\GitHub


In [15]:

# Find all the notebooks with the word "color" in their file name
black_list = ['.ipynb_checkpoints', '$Recycle.Bin']
folder_path = '../../color/ipynb'
ascii_regex = re.compile(r'[^A-Za-z0-9]+')
file_ending = '.ipynb'
file_paths_list = []
for root_dir in ['C:\\', 'D:\\']:
    for parent_directory, child_directories, files_list in os.walk(root_dir):
        if all(map(lambda x: x not in parent_directory, black_list)):
            for old_file_name in files_list:
                if old_file_name.endswith(file_ending) and ('color' in old_file_name.lower()):
                    old_file_path = os.path.join(parent_directory, old_file_name)
                    file_paths_list.append(old_file_path)

In [24]:

# Copy all the notebook files with the word "color" in their name to the color repo
import shutil

for old_file_path in file_paths_list:
    new_file_name = ascii_regex.sub('_', osp.relpath(old_file_path, osp.abspath(folder_path)).replace(file_ending, '').lstrip('.\\')) + file_ending
    new_file_path = osp.abspath(osp.join(folder_path, new_file_name))
    # shutil.copy(old_file_path, new_file_path)

In [None]:

# Open a file with a relative path in Notepad++
file_path = '../data/txt/resume.txt'
nu.open_path_in_notepad(file_path)

In [None]:

# Show the location of all the files with "hair" in the name
black_list = ['.ipynb_checkpoints', '$Recycle.Bin']
# walk_folder = osp.dirname(nu.github_folder)
for parent_directory, child_directories, files_list in os.walk('C:\\'):
    if all(map(lambda x: x not in parent_directory, black_list)):
        for file_name in files_list:
            if ('hair' in file_name):
                file_path = osp.join(parent_directory, file_name)
                print(file_path)

In [4]:

# Show the location of all the resume in Microsoft Word form
for parent_directory, child_directories, files_list in os.walk('C:\\'):
    for file_name in files_list:
        if file_name.endswith('.docx') and ('resume' in file_name.lower()):
            file_path = osp.join(parent_directory, file_name)
            if osp.isfile(file_path):
                print(f'{file_path}')

C:\Users\daveb\OneDrive\Documents\Babbitt Stuff\Jay_s Junk\Jay-Babbitt_Resume.docx
C:\Users\daveb\OneDrive\Documents\Google Drive\Other\Job Hunting\Dave_Babbitt_Resume (1).docx
C:\Users\daveb\OneDrive\Documents\Google Drive\Other\Job Hunting\Dave_Babbitt_Resume.docx
C:\Users\daveb\OneDrive\Documents\Google Drive\Other\Job Hunting\Dave_Babbitt_Resume_Cold_Fusion_Developer.docx
C:\Users\daveb\OneDrive\Documents\Google Drive\Other\Job Hunting\Dave_Babbitt_Resume_Delivery_Driver.docx
C:\Users\daveb\OneDrive\Documents\Google Drive\Other\Job Hunting\Dave_Babbitt_Resume_Jr._Java_Developer.docx
C:\Users\daveb\OneDrive\Documents\Google Drive\Other\Job Hunting\Dave_Babbitt_Resume_Predictive_Analyst.docx
C:\Users\daveb\OneDrive\Documents\Google Drive\Other\Job Hunting\Dave_Babbitt_Resume_QA_Tester.docx
C:\Users\daveb\OneDrive\Documents\Google Drive\Other\Job Hunting\Dave_Babbitt_Resume_Reactor_Operator.docx
C:\Users\daveb\OneDrive\Documents\Google Drive\Other\Job Hunting\Dave_Babbitt_Resume_Resea