### 1. Configure Spanner CLI and create Shopify Database

In [1]:
%env GOPATH=/home/jupyter/go
%env PATH=$PATH:/usr/local/go/bin:$GOPATH/bin
%env SPANNER_EMULATOR_HOST=localhost:9010
%env CLI=/home/jupyter/go/bin/spanner-cli
%env SPAN_PROJECT=acquired-rarity-288205
%env SPAN_INSTANCE=span-instance
%env SPAN_DATABASE=test-database

env: GOPATH=/home/jupyter/go
env: PATH=$PATH:/usr/local/go/bin:$GOPATH/bin
env: SPANNER_EMULATOR_HOST=localhost:9010
env: CLI=/home/jupyter/go/bin/spanner-cli
env: SPAN_PROJECT=acquired-rarity-288205
env: SPAN_INSTANCE=span-instance
env: SPAN_DATABASE=test-database


In [2]:
!$CLI -p $SPAN_PROJECT -i $SPAN_INSTANCE -d $SPAN_DATABASE -e "show tables"

Tables_in_test-database
T1


In [3]:
# for convenience
CLI_CONNECT="$CLI -p $SPAN_PROJECT -i $SPAN_INSTANCE -d $SPAN_DATABASE"

In [4]:
!{CLI_CONNECT} -e "create database shopify"

In [5]:
!{CLI_CONNECT} -e "show databases"

Database
shopify
test-database


In [6]:
# Overwriting
%env SPAN_DATABASE=shopify

env: SPAN_DATABASE=shopify


In [7]:
# creating 7 tables in the sql file
!{CLI_CONNECT} -f create_spanner_tables.sql

In [8]:
!{CLI_CONNECT} -e "show tables"

Tables_in_shopify
key_benefits
apps
pricing_plans
apps_categories
pricing_plan_features
categories
reviews


In [9]:
!{CLI_CONNECT} -e "show create table key_benefits"

Table	Create Table
key_benefits	CREATE TABLE key_benefits (
  app_id STRING(50),
  title STRING(2000),
  description STRING(MAX),
) PRIMARY KEY(app_id, title)


In [10]:
!{CLI_CONNECT} -e "show create table apps"

Table	Create Table
apps	CREATE TABLE apps (
  id STRING(50),
  url STRING(1000),
  title STRING(2000),
  developer STRING(100),
  developer_link STRING(1000),
  icon STRING(1000),
  rating FLOAT64,
  reviews_count INT64,
) PRIMARY KEY(id)


In [11]:
!{CLI_CONNECT} -e "show create table pricing_plans"

Table	Create Table
pricing_plans	CREATE TABLE pricing_plans (
  id STRING(50),
  app_id STRING(50),
  title STRING(200),
  price FLOAT64,
) PRIMARY KEY(id)


In [12]:
!{CLI_CONNECT} -e "show create table apps_categories"

Table	Create Table
apps_categories	CREATE TABLE apps_categories (
  app_id STRING(50),
  category_id STRING(50),
) PRIMARY KEY(app_id, category_id)


In [13]:
!{CLI_CONNECT} -e "show create table pricing_plan_features"

Table	Create Table
pricing_plan_features	CREATE TABLE pricing_plan_features (
  app_id STRING(50),
  pricing_plan_id STRING(50),
  feature STRING(MAX),
) PRIMARY KEY(app_id, pricing_plan_id, feature)


In [14]:
!{CLI_CONNECT} -e "show create table categories"

Table	Create Table
categories	CREATE TABLE categories (
  id STRING(50),
  title STRING(2000),
) PRIMARY KEY(id)


In [15]:
!{CLI_CONNECT} -e "show create table reviews"

Table	Create Table
reviews	CREATE TABLE reviews (
  app_id STRING(50),
  author STRING(250),
  rating INT64,
  posted_at DATE,
) PRIMARY KEY(app_id, author)


#### 2. Open terminal and download shopify dataset: 
#####    gsutil cp gs://cs327e-open-access/shopify.zip .
#####    unzip shopify.zip

### 3. Populate tables

In [16]:
from google.cloud import spanner
from google.cloud.spanner_v1 import param_types
import pandas as pd

In [17]:
span_instance="span-instance"
span_database="shopify"
spanner_client = spanner.Client()
instance = spanner_client.instance(span_instance)
database = instance.database(span_database)

In [18]:
df = pd.read_csv('/home/jupyter/shopify/apps.csv', sep=',', header=0, lineterminator='\n')
values = df.values.tolist()

with database.batch() as batch:
    batch.insert_or_update(
        table="apps",
        columns=("id", "url", "title", "developer", "developer_link", "icon", "rating", "reviews_count"),
        values=values
    )

In [19]:
df = pd.read_csv('/home/jupyter/shopify/apps_categories.csv', sep=',', header=0, lineterminator='\n')
values = df.values.tolist()

with database.batch() as batch:
    batch.insert_or_update(
        table="apps_categories",
        columns=("app_id", "category_id"),
        values=values
    )

In [20]:
df = pd.read_csv('/home/jupyter/shopify/categories.csv', sep=',', header=0, lineterminator='\n')
values = df.values.tolist()

with database.batch() as batch:
    batch.insert_or_update(
        table="categories",
        columns=("id", "title"),
        values=values
    )

In [21]:
df = pd.read_csv('/home/jupyter/shopify/key_benefits.csv', sep=',', header=0, lineterminator='\n')
values = df.values.tolist()

with database.batch() as batch:
    batch.insert_or_update(
        table="key_benefits",
        columns=("app_id", "title", "description"),
        values=values
    )

In [22]:
df = pd.read_csv('/home/jupyter/shopify/pricing_plans.csv', sep=',', header=0, lineterminator='\n')
values = df.values.tolist()

with database.batch() as batch:
    batch.insert_or_update(
        table="pricing_plans",
        columns=("id", "app_id", "title", "price"),
        values=values
    )

In [23]:
df = pd.read_csv('/home/jupyter/shopify/pricing_plan_features.csv', sep=',', header=0, lineterminator='\n')
values = df.values.tolist()

with database.batch() as batch:
    batch.insert_or_update(
        table="pricing_plan_features",
        columns=("app_id", "pricing_plan_id", "feature"),
        values=values
    )

In [24]:
!{CLI_CONNECT} -e "select count(*) from apps"


3547


In [25]:
!{CLI_CONNECT} -e "select count(*) from apps_categories"


5383


In [26]:
!{CLI_CONNECT} -e "select count(*) from categories"


12


In [27]:
!{CLI_CONNECT} -e "select count(*) from key_benefits"


9541


In [28]:
!{CLI_CONNECT} -e "select count(*) from pricing_plans"


6275


In [29]:
!{CLI_CONNECT} -e "select count(*) from pricing_plan_features"


16270


In [30]:
def insert_app(transaction):

    row_ct = transaction.execute_update(
        "INSERT INTO apps (id, url, title, developer, developer_link, icon, reviews_count) "
        "VALUES ('683d06af-14c7-4733-9bde-ec5b699af996', "
          "'https://apps.shopify.com/watchlist?surface_detail=inventory-management&surface_inter_position=1&surface_intra_position=9&surface_type=category'," 
          "'Back in Stock Product Alerts', "
          "'Swym Corporation', "
          "'https://apps.shopify.com/partners/developer-ca6a967f09890f68',"
          "'https://apps.shopifycdn.com/listing_images/9905a4c8f22cb4a3b0c32af55a58ec21/icon/e6d46a7e5e1df375d542d033aae80459.png?height=72&width=72',"
           "0)"
    )
    
    print("{} record(s) inserted.".format(row_ct))
    
    row_ct = transaction.execute_update(
        "INSERT INTO apps_categories (app_id, category_id) "
        "VALUES ('683d06af-14c7-4733-9bde-ec5b699af996', '737ad50051083aa051d127a53b3ac0da')"
    )
    
    print("{} record(s) inserted.".format(row_ct))
    
    
    row_ct = transaction.execute_update(
        "INSERT INTO key_benefits (app_id, title, description) "
        "VALUES ('683d06af-14c7-4733-9bde-ec5b699af996', "
          "'Drive Back In Stock Sales', 'Back in stock, out of stock, pre order & restock alerts')"
    )
    
    print("{} record(s) inserted.".format(row_ct))
    
    row_ct = transaction.execute_update(
        "INSERT INTO pricing_plans (id, app_id, title, price) "
        "VALUES ('961a2751-1dbd-4f0f-8248-9fe6a6efd4ab', '683d06af-14c7-4733-9bde-ec5b699af996', "
                "'Free Trial', 14.99)"
    )
    
    print("{} record(s) inserted.".format(row_ct))
    

    row_ct = transaction.execute_update(
        "INSERT INTO pricing_plan_features (app_id, pricing_plan_id, feature) "
        " VALUES ('683d06af-14c7-4733-9bde-ec5b699af996', '961a2751-1dbd-4f0f-8248-9fe6a6efd4ab', "
                 "'Up to 250 alert requests/mo')"
    )
    
    print("{} record(s) inserted.".format(row_ct))
      
database.run_in_transaction(insert_app)

1 record(s) inserted.
1 record(s) inserted.
1 record(s) inserted.
1 record(s) inserted.
1 record(s) inserted.


In [31]:
def update_price(transaction):
    row_ct = transaction.execute_update(
            "UPDATE pricing_plans "
            "SET price = price / 2 "
            "WHERE price > 0"
        )

    print("{} record(s) updated.".format(row_ct))

database.run_in_transaction(update_price)

4780 record(s) updated.


In [32]:
def delete_app(transaction):
        row_ct = transaction.execute_update(
            "DELETE FROM apps WHERE id = '683d06af-14c7-4733-9bde-ec5b699af996'"
        )

        print("{} record(s) deleted.".format(row_ct))
        
        row_ct = transaction.execute_update(
            "DELETE FROM apps_categories WHERE app_id = '683d06af-14c7-4733-9bde-ec5b699af996'"
        )
        
        print("{} record(s) deleted.".format(row_ct))
        
        row_ct = transaction.execute_update(
            "DELETE FROM key_benefits WHERE app_id = '683d06af-14c7-4733-9bde-ec5b699af996'"
        )
        
        print("{} record(s) deleted.".format(row_ct)) 
        
        
        row_ct = transaction.execute_update(
            "DELETE FROM pricing_plans WHERE app_id = '683d06af-14c7-4733-9bde-ec5b699af996'"
        )
        
        print("{} record(s) deleted.".format(row_ct)) 
        
        
        row_ct = transaction.execute_update(
            "DELETE FROM pricing_plan_features WHERE app_id = '683d06af-14c7-4733-9bde-ec5b699af996'"
        )
        
        print("{} record(s) deleted.".format(row_ct))

database.run_in_transaction(delete_app)

1 record(s) deleted.
1 record(s) deleted.
1 record(s) deleted.
1 record(s) deleted.
1 record(s) deleted.


## begin project 3 work

In [33]:
df = pd.read_csv('/home/jupyter/shopify/reviews.csv', sep=',', header=0, lineterminator='\n')
df.drop_duplicates(subset=['app_id', 'author'], inplace = True)
values = df.values.tolist()

with database.batch() as batch:
    batch.insert_or_update(
        table="reviews",
        columns=("app_id", "author", "rating", "posted_at"),
        values=values
    )

In [34]:
!{CLI_CONNECT} -e "select count(*) from reviews"


124601


In [38]:
!{CLI_CONNECT} -e "show create table reviews"

Table	Create Table
reviews	CREATE TABLE reviews (
  app_id STRING(50),
  author STRING(250),
  rating INT64,
  posted_at DATE,
) PRIMARY KEY(app_id, author)


In [35]:
!{CLI_CONNECT} -e "delete from reviews where author = 'Junho Yang'"

In [36]:
!{CLI_CONNECT} -e "delete from reviews where author = 'Hyeon Gu Kim'"

In [37]:
!{CLI_CONNECT} -e "INSERT INTO reviews (app_id, author, rating, posted_at) VALUES \
            ('683d06af-14c7-4733-9bde-ec5b699af996', 'Junho Yang', 5, '2019-9-30')"

In [38]:
!{CLI_CONNECT} -e "INSERT INTO reviews (app_id, author, rating, posted_at) VALUES \
            ('683d06af-14c7-4733-9bde-ec5b699af996', 'Hyeon Gu Kim', 4, '2019-9-30')"

In [39]:
!{CLI_CONNECT} -e "select * from reviews where app_id = '683d06af-14c7-4733-9bde-ec5b699af996'"

app_id	author	rating	posted_at
683d06af-14c7-4733-9bde-ec5b699af996	Hyeon Gu Kim	4	2019-09-30
683d06af-14c7-4733-9bde-ec5b699af996	Junho Yang	5	2019-09-30


In [40]:
def delete_app(transaction):
        row_ct = transaction.execute_update(
            "DELETE FROM apps WHERE id = '683d06af-14c7-4733-9bde-ec5b699af996'"
        )

        print("{} record(s) deleted.".format(row_ct))

database.run_in_transaction(delete_app)

0 record(s) deleted.


In [41]:
def insert_app(transaction):

    row_ct = transaction.execute_update(
        "INSERT INTO apps (id, url, title, developer, developer_link, icon, rating, reviews_count) "
        "VALUES ('683d06af-14c7-4733-9bde-ec5b699af996', "
          "'https://apps.shopify.com/affiliate'," 
          "'JUNHOHO', "
          "'Junho Yang', "
          "'https://apps.shopify.com/partners/aaawebstore',"
          "'https://apps.shopifycdn.com/listing_images/b40df7fe45d61bc27de4de04cf83cbae/icon/a9254e3723c0c60e3bc7093d077aa587.png?height=84&width=84',"
           "4.5, "
        "2)"
    )
    
    print("{} record(s) inserted.".format(row_ct))


database.run_in_transaction(insert_app)

1 record(s) inserted.


In [43]:
!{CLI_CONNECT} -e "ALTER TABLE apps_categories \
  ADD CONSTRAINT apps_cat_app_id_fk FOREIGN KEY (app_id) REFERENCES apps (id)"

In [45]:
!{CLI_CONNECT} -e "ALTER TABLE apps_categories \
  ADD CONSTRAINT apps_cat_cat_id_fk FOREIGN KEY (category_id) REFERENCES categories (id)"

In [46]:
!{CLI_CONNECT} -e "show create table apps_categories"

Table	Create Table
apps_categories	CREATE TABLE apps_categories (
  app_id STRING(50),
  category_id STRING(50),
  CONSTRAINT apps_cat_app_id_fk FOREIGN KEY(app_id) REFERENCES apps(id),
  CONSTRAINT apps_cat_cat_id_fk FOREIGN KEY(category_id) REFERENCES categories(id),
) PRIMARY KEY(app_id, category_id)


In [63]:
!{CLI_CONNECT} -e "show create table apps"

Table	Create Table
apps	CREATE TABLE apps (
  id STRING(50),
  url STRING(1000),
  title STRING(2000),
  developer STRING(100),
  developer_link STRING(1000),
  icon STRING(1000),
  rating FLOAT64,
  reviews_count INT64,
) PRIMARY KEY(id)


In [58]:
!{CLI_CONNECT} -e "show create table categories"

Table	Create Table
categories	CREATE TABLE categories (
  id STRING(50),
  title STRING(2000),
) PRIMARY KEY(id)


In [59]:
!{CLI_CONNECT} -e "ALTER TABLE key_benefits \
  ADD CONSTRAINT key_ben_apps_id_fk FOREIGN KEY (app_id) REFERENCES apps (id)"

In [60]:
!{CLI_CONNECT} -e "show create table key_benefits"

Table	Create Table
key_benefits	CREATE TABLE key_benefits (
  app_id STRING(50),
  title STRING(2000),
  description STRING(MAX),
  CONSTRAINT key_ben_apps_id_fk FOREIGN KEY(app_id) REFERENCES apps(id),
) PRIMARY KEY(app_id, title)


In [51]:
!{CLI_CONNECT} -e "ALTER TABLE pricing_plan_features \
  ADD CONSTRAINT ppf_id_pp_id_fk FOREIGN KEY (pricing_plan_id) REFERENCES pricing_plans (app_id)"

In [55]:
!{CLI_CONNECT} -e "ALTER TABLE pricing_plan_features \
  ADD CONSTRAINT papp_id_pp_id_fk FOREIGN KEY (app_id) REFERENCES pricing_plans (id)"

In [56]:
!{CLI_CONNECT} -e "show create table pricing_plan_features"

Table	Create Table
pricing_plan_features	CREATE TABLE pricing_plan_features (
  app_id STRING(50),
  pricing_plan_id STRING(50),
  feature STRING(MAX),
  CONSTRAINT ppf_id_pp_id_fk FOREIGN KEY(pricing_plan_id) REFERENCES pricing_plans(app_id),
  CONSTRAINT papp_id_pp_id_fk FOREIGN KEY(app_id) REFERENCES pricing_plans(id),
) PRIMARY KEY(app_id, pricing_plan_id, feature)


In [113]:
!{CLI_CONNECT} -e "ALTER TABLE pricing_plans \
  ADD CONSTRAINT pp_id_apps_id_fk FOREIGN KEY (app_id) REFERENCES apps (id)"

In [114]:
!{CLI_CONNECT} -e "show create table pricing_plans"

E1001 07:51:04.044930559    5192 backup_poller.cc:132]       Run client channel backup poller: {"created":"@1601538664.044837940","description":"pollset_work","file":"src/core/lib/iomgr/ev_epollex_linux.cc","file_line":321,"referenced_errors":[{"created":"@1601538664.044835253","description":"Bad file descriptor","errno":9,"file":"src/core/lib/iomgr/ev_epollex_linux.cc","file_line":948,"os_error":"Bad file descriptor","syscall":"epoll_wait"}]}
Table	Create Table
pricing_plans	CREATE TABLE pricing_plans (
  id STRING(50),
  app_id STRING(50),
  title STRING(200),
  price FLOAT64,
  CONSTRAINT pp_id_apps_id_fk FOREIGN KEY(app_id) REFERENCES apps(id),
) PRIMARY KEY(id)


In [116]:
!{CLI_CONNECT} -e "ALTER TABLE reviews \
  ADD CONSTRAINT r_id_apps_id_fk FOREIGN KEY (app_id) REFERENCES apps (id)"

In [117]:
!{CLI_CONNECT} -e "show create table reviews"

Table	Create Table
reviews	CREATE TABLE reviews (
  app_id STRING(50),
  author STRING(250),
  rating INT64,
  posted_at DATE,
  CONSTRAINT r_id_apps_id_fk FOREIGN KEY(app_id) REFERENCES apps(id),
) PRIMARY KEY(app_id, author)


In [119]:
!{CLI_CONNECT} -e "CREATE INDEX app_rating_idx ON apps(rating)"

In [48]:
!{CLI_CONNECT} -e "CREATE INDEX app_id_idx ON apps(id)"

In [49]:
!{CLI_CONNECT} -e "CREATE INDEX review_id_idx ON reviews(app_id)"

In [50]:
!{CLI_CONNECT} -e "CREATE INDEX review_author_idx ON reviews(author)"