* Exercise: Convert NYSE data from CSV in `data/nyse_all/nyse_data` to JSON format using gzip compression.
  * Source folder: `data/nyse_all/nyse_data`
  * Target folder: `data/nyse_all/nyse_json`
  * File Format: `gzip` compressed json format.
  * Column Names: `['ticker', 'trade_date', 'open_price', 'low_price', 'high_price', 'close_price', 'volume']`
  * Make sure file names are generated using `part-uuid.json` format (eg: `part-some-unique-id.json.gz`)
  * Validate by using shape on both source and target locations.


In [None]:
import glob
import os
import uuid
import pandas as pd

In [None]:
for path in glob.glob('data/nyse_all/nyse_data/*.txt.gz'):
    df = pd.read_csv(
        path,
        names=['ticker', 'trade_date', 'open_price', 'low_price', 
               'high_price', 'close_price', 'volume']
    )
    print(f'Number of records in {path} is {df.shape[0]}')

In [None]:
%%time

os.makedirs('data/nyse_all/nyse_json', exist_ok=True)
for path in glob.glob('data/nyse_all/nyse_data/*.txt.gz'):
    df = pd.read_csv(
        path,
        names=['ticker', 'trade_date', 'open_price', 'low_price', 
               'high_price', 'close_price', 'volume']
    )
    df.to_json(
        f'data/nyse_all/nyse_json/part-{str(uuid.uuid1())}.json.gz',
        orient='records',
        lines=True,
        compression='gzip'
    )
    print(f'Number of records processed in {path} is {df.shape[0]}')

In [None]:
%%time
for path in glob.glob('data/nyse_all/nyse_json/*.json.gz'):
    df = pd.read_json(
        path,
        lines=True
    )
    print(f'Number of records in {path} is {df.shape[0]}')