In [0]:
def dbfs_file_exists(path):
  try:
    dbutils.fs.ls(path)
    return True
  except Exception as e:
    if 'java.io.FileNotFoundException' in str(e):
      return False
    else:
      raise
  
def unzip_and_rename(zip_path, output_dir):

    '''
    Inputs:
    * zip_path: complete zip file path
    * output_dir: output directory

    Output:
    * Saving unzipped file in output_dir with the same name as the zip file, but without the .zip extension
    * Returns the name of the unzipped file
    '''

    base_name       = os.path.basename(zip_path)
    base_name_nozip = os.path.splitext(base_name)[0]  # original zip file name

    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        inner_file = zip_ref.namelist()[0] # before we asserte all zips had only one zipped file inside
       
        ext = os.path.splitext(inner_file)[1]  # zipped file extension
        output_name = base_name_nozip + ext

        output_file_path = os.path.join(output_dir, output_name)

        with zip_ref.open(inner_file) as source, open(output_file_path, 'wb') as target:
            target.write(source.read())

        return output_name
    
def detect_encoding(path: str, sample_size: int = 4096) -> str:
    """
    Read the first `sample_size` bytes of a file and return the encoding guessed by chardet.
    """
    # Read a few raw bytes
    with open(path, "rb") as f:
        raw_bytes = f.read(sample_size)

    # Use chardet to detect
    result = chardet.detect(raw_bytes)

    return result.get("encoding") or "unknown"

def detect_format(path):
    with open(path, "rb") as f:
        sig = f.read(6)
    if sig.startswith(b"PK\x03\x04"):
        return "zip"
    if sig.startswith(b"7z\xBC\xAF\x27\x1C"):
        return "7z"
    return "unknown"