# utils

> Tools to help understand what code is doing. 

In [None]:
#| default_exp utils

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from typing import Literal
import psutil
import rich
import time
import multiprocessing
import tracemalloc
import pandas as pd
import numpy as np
import datetime

In [None]:
#| export
def pmu(
    description: str = None,  # Optional, use to give descriptive log for the memory usage at a particular point in the code.
    mode: Literal["both", "cpu", "gpu"] = "both",  # Specify which memory to print. Options are "cpu", "gpu", or "both" (the default).
):
    """Print memory usage
    
    
    """
    if description:
       print(description) 
    
    if mode in ("cpu", "both"):
        main_process = psutil.Process()
        child_processes = main_process.children(recursive=True)
        total_memory_usage = main_process.memory_info().rss
        for child in child_processes:
            total_memory_usage += child.memory_info().rss
        print(f"CPU Memory usage: {total_memory_usage / 1024 ** 2:.2f} MB")
        
    if mode in ("gpu", "both"):
        try:
            from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo
        except ImportError as e:
            e.args += "Checking gpu memory is only supported for Nvidia devices and requires the pynvml library to be installed"
            raise e

        nvmlInit()
        handle = nvmlDeviceGetHandleByIndex(0)
        info = nvmlDeviceGetMemoryInfo(handle)
        print("GPU Memory Usage:")
        print(f"Total: {info.total / 1024**2:.2f} MiB")
        print(f"Used: {info.used / 1024**2:.2f} MiB")
        print(f"Free: {info.free / 1024**2:.2f} MiB")

In [None]:
#| export
class NS:
    """Class to store state of variables in the global namespace
    
    Useful for tracking changes to the global namespace at various points in the code.
    Particularly useful for use in a Jupyter notebook.
    """
    def __init__(self):
        self.initial_variables = copy.copy(globals())

    def __call__(self):
        current_variables = copy.copy(globals())

        added = set(current_variables) - set(self.initial_variables)
        removed = set(self.initial_variables) - set(current_variables)
        modified = {
            key for key in (set(current_variables) & set(self.initial_variables))
            if current_variables[key] != self.initial_variables[key]
        }

        print("Added variables:")
        print(added)
        print("\nRemoved variables:")
        print(removed)
        print("\nModified variables:")
        print(modified)

In [None]:
#| export
def vprint(message):
    """Give functions has a verbose: bool parameter, and add vprint statements to print messages only when verbose is True"""
    import inspect
    local_vars = inspect.currentframe().f_back.f_locals
    if local_vars.get("verbose", False):
        print(message)

In [None]:
from rich.console import Console

In [None]:
#| export
console = Console()
print_ts = time.time()

def sprint(*args, **kwargs) -> None:
    global print_ts
    now = time.time()
    proc = multiprocessing.current_process().name
    if proc == "MainProcess":
        proc = f"[bold]{proc:<16}[/bold]"
    else:
        proc = f"{proc:>16}"
    console.print(f"{proc} [[green bold]{now - print_ts:>5.2f}s[/]]", *args, **kwargs)
    print_ts = now
    
def mem_usage() -> str:
    current, peak = tracemalloc.get_traced_memory()
    return f"Memory usage: {current//1024//1024} MB; peak: {peak//1024//1024} MB"

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()

In [None]:
def make_data(length: int) -> np.array:
    data = []
    sprint("Putting data into a Python list.", mem_usage()) 
    for i in range(length // 5):
        data.append((datetime.datetime.now(), 0))
        data.append((datetime.datetime.now(), 0))
        data.append((datetime.datetime.now(), 1))
        data.append((datetime.datetime.now(), 0))
        data.append((datetime.datetime.now(), np.nan))
    sprint("Converting into a data frame.", mem_usage())
    df = pd.DataFrame(data, columns=["timestamp", "val"])
    sprint("Converting into numpy array", mem_usage())
    np_array = df.to_records()
    sprint("Returning", mem_usage())
    return np_array

tracemalloc.start()
print_ts = time.time()
make_data(500000)
tracemalloc.stop()


In [None]:
from multiprocessing.managers import SharedMemoryManager

SharedMemoryManager??

[0;31mInit signature:[0m [0mSharedMemoryManager[0m[0;34m([0m[0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m        
    [0;32mclass[0m [0mSharedMemoryManager[0m[0;34m([0m[0mBaseManager[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0;34m"""Like SyncManager but uses SharedMemoryServer instead of Server.[0m
[0;34m[0m
[0;34m        It provides methods for creating and returning SharedMemory instances[0m
[0;34m        and for creating a list-like object (ShareableList) backed by shared[0m
[0;34m        memory.  It also provides methods that create and return Proxy Objects[0m
[0;34m        that support synchronization across processes (i.e. multi-process-safe[0m
[0;34m        locks and semaphores).[0m
[0;34m        """[0m[0;34m[0m
[0;34m[0m[0;34m[0m
[0;34m[0m        [0m_Server[0m [0;34m=[0m [0mSharedMemoryServer[0m[0;34m[0m
[0;34m[0m[0;34m[0m
[0;34m[0m    