In [None]:
from typing import Dict, List, Union

def get_leaves(item: Union[Dict, List], key=None):
    """Return all key: values recursively.
    
    Parameters
    ----------
    item: dict
        The dictionary.
    
    key: dict
        The key.
    
    Returns
    -------
    Return key: values recursively.
    """
    try:
        if isinstance(item, dict):
            leaves = {}

            for i in item.keys():
                leaves.update(get_leaves(item[i], i))
            return leaves

        elif isinstance(item, list):
            leaves = {}

            for i in item:
                leaves.update(get_leaves(i, key))
            return leaves

        else:
            return {key: item}

    except Exception as e:
        print(e)

## Paths

In [None]:
# Standard Libraries
import os
import glob
import json
import getpass
import pathlib

# Third-party Libraries
import pandas as pd

In [None]:
# Home drive
HOME_DRIVE = str(pathlib.Path.home())

# Paths
OTHER_DRIVE = "C:\\"
ext = "csv"

some_path = "path\\to\\file"
path = os.path.join(HOME_DRIVE, some_path)

# wp = getpass.getpass("Enter: ")

full_path = glob.glob("{}\\*.{}".format(path, ext))
# os.path.isfile(full_path)

pd.concat([pd.read_csv(_) for _ in full_path])

### Read/ Write Files

In [1]:
# Standard Libraries
import csv

# Third-party Libraries
# import ijson

# Read in a file
with open(file, "r") as f:

    # Read json file
    if file.endswith("json"):

        # Loads json object as a string
        data = json.load(f)
        
        # Loads json string into dictionary
        new_data = json.loads(data)
    
    # Rad csv file
    elif file.endswith("csv"):
        
        # reader = csv.reader(f, delimiter=",")
        reader = csv.DictReader(f)

# # Write to a csv file
# with open(file, "w") as outfile:
#     json.dump(data, outfile)

# # Write to a csv file
# f = csv.writer(open("text.csv", "w"))
# f.writerow(["Column_Name"])
# f.writerow(["rows"])

In [None]:
# pd.DataFrame(data)
# pd.read_json(file, orient="columns")

### Json

In [None]:
# json.dumps([dict1, dict2])
# json.loads(json.dumps([dict1, dict2]))

### Batch large json files

In [None]:
# # Funstion for generator
# def batch_large_file(filename):
#     for i in open(filename, "r"):
#         yield i

In [None]:
# # Generator
# file = batch_large_file(big_file)

# # Count to append on different files
# counter = 1

# for f in files:
    
#     # Add number to file
#     new_file = "batch/batch_{}.json".format(str(counter))
    
#     # Write to a json file
#     with open(new_file, "w") as output:
#         json.dump(f, output)
        
#     counter += 1

## API

In [None]:
# Standard Libraries
import re
import base64
import urllib
import requests
import traceback

# Third-party Libraries
import pprint
import xmltodict
import elasticsearch
from io import StringIO
from bs4 import BeautifulSoup, SoupStrainer

In [None]:
keywords = "avengers"

# Use urllib to parse query string. Helps to reformat in url parameters in a friendly way.
query = urllib.parse.quote_plus(keywords)

base = ""

url = base + "q=" + query

request_body = {
    "key": "value"
}

In [None]:
session = requests.Session()

try:
    
    # Disable urllib warning
    requests.packages.urllib3.disable_warnings(
        requests.packages.urllib3.exceptions.SecurityWarning
    )
    
    # request = session.get(url, headers={"content-type": "application/json"})
    request = session.post(
        url,
        # cert=(cer,key),
        json=request_body,
        headers={"content-type": "application/json"},
        verify=False,
        allow_redirects=False,
        stream=True,
    )
except Exception as e:
    traceback.print_exc()
    print(type(e))
    print(e)

In [None]:
requests.text

In [None]:
# Source: https://github.com/fhightower/html-to-json/blob/main/html_to_json/convert_html.py

# HTML
if "HTML" in requests.text:
    json_data = convert_html.convert(requests.text)

# XML
elif "xml" in requests.text:
    json_string = json.dumps(xmltodict.parse(requests.text))
    json_data = json.loads(json_string)
    
else:
    json_data = request.json()
    # json_data = json.loads(requests.text)

json_data

In [None]:
# BeautifulSoup
soup = BeautifulSoup(requests.content, "xml")  # html.parser

In [None]:
soup.find("title").get_text()

In [None]:
re.sub("\n", "", soup.find("body").get_text())

In [None]:
# # Remove white spaces
# [i for in soup.stripped_strings]

In [None]:
_df_list = []

for i in json_data:
    
    if type(json_data[i]) == list:
        for j in json_data[i]:
            # _df_list.append(get_leaves(j))
            _df_list.append(pd.json_normalize(j))

pd.concat(_df_list, ignore_index=True, sort=False)

In [None]:
# dict([(k, Series(v)) for k, v in json_data.items()])
# pd.from_dict(json_data, orient="index")

## Regex

In [None]:
re.findall("[0-9a-zA-Z]", "(1,2,3,4,5)")

## Pandas

In [None]:
# e_df["eml"].str.extract("\<(.*?)>\\", expand=True)
# e_df.loc[(e_df["eml"].str.contains("@")) & ~(e_df["eml"].str.contains("<")), "to"] = e_df.loc[(e_df["eml"].str.contains("@")) & ~(e_df["eml"].str.contains("<")), "from"]

In [None]:
# # Split columns
# df["a"], df["b"] = df["ab"].str.split(",", 1).str

In [None]:
# # Indexing
# pd.at[index, column]

In [None]:
# # Formatting
# pd.map("{:,.2f}".format)
# pd.to_string(formatters={"cost": "{:,.2f}".format})
# pd.options.display.float_format = "{:,.2f}".format

In [None]:
# dates.apply(lambda x:x.strftime("%Y-%m-%d"))

In [None]:
# pd.groupby([...])[].sum.unstack(fill_value=0).reset_index().rename_axis(None, axis=1)