# Yelp

## 1. Introduction

The purpose of this notebook is to implement an inital load for BRONZE stage using JSON data.

## 2. Environment setup

In [0]:
%sql
USE CATALOG training_catalog;

In [0]:
%sql
USE SCHEMA yelp_db;

In [0]:
%sql
SELECT current_catalog() AS current_catalog, current_schema() AS current_schema;

## 3. Batch data ingestion with CTAS

This method is to ingest data as a inital load.

Steps:

1. Create table
2. Ingest data

### 3.1. Exploring data

Exploring files

In [0]:
%sql
LIST "/Volumes/training_catalog/yelp_db/training_files"

#### Business

In [0]:
spark.sql("SELECT * FROM text.`/Volumes/training_catalog/yelp_db/training_files/*business*` LIMIT 5").display()

In [0]:
%sql
SELECT *
FROM read_files(
    "/Volumes/training_catalog/yelp_db/training_files/*business*",
    FORMAT => "JSON"
)
LIMIT 5;

#### Checkin

In [0]:
spark.sql("SELECT * FROM text.`/Volumes/training_catalog/yelp_db/training_files/*checkin*` LIMIT 5").display()

In [0]:
%sql
SELECT *
FROM read_files(
    "/Volumes/training_catalog/yelp_db/training_files/*checkin*",
    FORMAT => "JSON"
)
LIMIT 5;

#### Tip

In [0]:
spark.sql("SELECT * FROM text.`/Volumes/training_catalog/yelp_db/training_files/*tip*` LIMIT 5").display()

In [0]:
%sql
SELECT *
FROM read_files(
    "/Volumes/training_catalog/yelp_db/training_files/*tip*",
    FORMAT => "JSON"
)
LIMIT 5;

### 3.2. Creating managed delta table

#### 3.2.1. Store data into table

##### Business

In [0]:
%sql
DROP TABLE IF EXISTS yelp_business_bronze;

-- Create table
CREATE TABLE IF NOT EXISTS yelp_business_bronze
AS
SELECT
    *,
    _metadata.file_modification_time,
    _metadata.file_name
FROM read_files(
    "/Volumes/training_catalog/yelp_db/training_files/*business*",
    FORMAT => "JSON",
    HEADER => TRUE,
    SCHEMA => '''
        business_id STRING,
        name STRING,
        address STRING,
        city STRING,
        state STRING,
        postal_code STRING,
        latitude DOUBLE,
        longitude DOUBLE,
        stars DOUBLE,
        review_count INT,
        is_open TINYINT,
        attributes STRING,
        categories STRING,
        hours STRING
    ''',
    rescueddatacolumn => "_rescued_data"
);

-- Preview data
SELECT * FROM yelp_business_bronze LIMIT 5;

##### Checkin

In [0]:
%sql
DROP TABLE IF EXISTS yelp_checkin_bronze;

-- Create table
CREATE TABLE IF NOT EXISTS yelp_checkin_bronze
AS
SELECT
    *,
    _metadata.file_modification_time,
    _metadata.file_name
FROM read_files(
    "/Volumes/training_catalog/yelp_db/training_files/*checkin*",
    FORMAT => "JSON",
    HEADER => TRUE,
    SCHEMA => '''
        business_id STRING,
        date STRING
    ''',
    rescueddatacolumn => "_rescued_data"
);

-- Preview data
SELECT * FROM yelp_checkin_bronze LIMIT 5;

##### Tip

In [0]:
%sql
DROP TABLE IF EXISTS yelp_tip_bronze;

-- Create table
CREATE TABLE IF NOT EXISTS yelp_tip_bronze
AS
SELECT
  *,
  _metadata.file_modification_time,
  _metadata.file_name
FROM read_files(
    "/Volumes/training_catalog/yelp_db/training_files/*tip*",
    FORMAT => "JSON",
    HEADER => TRUE,
    SCHEMA => '''
        user_id STRING,
        business_id STRING,
        text STRING,
        date TIMESTAMP_NTZ,
        compliment_count INT
    ''',
    rescueddatacolumn => "_rescued_data"
);

-- Preview data
SELECT * FROM yelp_tip_bronze LIMIT 5;

#### 3.2.2. Store data with selected values from json into a new table

##### Business

In [0]:
%sql
SELECT * FROM yelp_business_bronze;

In [0]:
%sql
CREATE OR REPLACE TABLE yelp_business_json_bronze
AS
SELECT 
  * 
  EXCEPT(attributes, hours),
  attributes.AcceptsInsurance AS accepts_insurance,
  hours.Sunday AS sunday_hour,
  hours.Monday AS monday_hour,
  hours.Tuesday AS tuesday_hour,
  hours.Wednesday AS wednesday_hour,
  hours.Thursday AS thursday_hour,
  hours.Friday AS friday_hour,
  hours.Saturday AS saturday_hour
FROM yelp_business_bronze;

SELECT * FROM yelp_business_json_bronze LIMIT 5;