In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
# See the License for the specific language governing permissions and
# limitations under the License.

# Environment Setup

Install the following python packages to setup the environment.

In [39]:
! pip install -U google-cloud-bigquery



Specify your project ID in the next cell.

In [46]:
#enter your Project ID!

PROJECT_ID = '<your_project_id>'  # Change to your project ID
LOCATION = 'us-central1'
DATASET_ID = 'clinic_dataset'


### BigQuery: Create dataset

Create a BigQuery dataset to upload the transaction data.

In [47]:
# Create BigQuery Dataset on your project
from google.cloud import bigquery
import pandas as pd
#from google.cloud import datacatalog_v1

bq_client = bigquery.Client(project=PROJECT_ID)
#datacatalog_client = datacatalog_v1.DataCatalogClient()

dataset_id = "{}.{}".format(bq_client.project, DATASET_ID)
dataset = bigquery.Dataset(dataset_id)
dataset.location = "US"

# Create the dataset
try:
    dataset = bq_client.create_dataset(dataset, timeout=30)
    print(f'Dataset {DATASET_ID} create successfully.')
except Exception as e:
    print(e)

Dataset clinic_dataset create successfully.


### BigQuery: Create tables and ingesting with data

The next cell will load data from csv into BigQuery.
If this process fails, try to recreate the dataset with the cell above and load the data.

In [48]:
#define source csv files
customerfile= 'https://raw.githubusercontent.com/dataanalyticsworkshop/bigdata-genai/main/dataset/clinik-customer.csv'
ordertransactionfile = 'https://raw.githubusercontent.com/dataanalyticsworkshop/bigdata-genai/main/dataset/clinic-transaction.csv'
surveyfile = 'https://raw.githubusercontent.com/dataanalyticsworkshop/bigdata-genai/main/dataset/clinic-surveyscore.csv'

In [49]:
#create function to import csv to BigQuery
def import_csv_to_bq (filepath, table_id):

  job_config = bigquery.LoadJobConfig(
      source_format=bigquery.SourceFormat.CSV, skip_leading_rows=1, autodetect=True,
          write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE )

  df = pd.read_csv(filepath, delimiter=',', )
  load_job = bq_client.load_table_from_dataframe(dataframe=df,
                                          destination=table_id,
                                            job_config=job_config)  # Make an API request.


  load_job.result()  # Waits for the job to complete.

  table = bq_client.get_table(table_id)  # Make an API request.
  print(
      "Loaded {} rows and {} columns to {}".format(
          table.num_rows, len(table.schema), table_id
      )
)

In [50]:
#import customer csv
tablename = 'customerdata'
table_id = "{}.{}".format(dataset_id, tablename) #fully qualified table name
print (table_id)

import_csv_to_bq(customerfile, table_id)

#preview imported data
bq_client.query("SELECT * FROM "+ "`" +table_id+ "`").to_dataframe().head()

#wait until dataframe preview completed!

bqstackdemo.clinic_dataset.customerdata
Loaded 58 rows and 7 columns to bqstackdemo.clinic_dataset.customerdata


Unnamed: 0,custid,custname,dateofbirth,city_address,nationality,memberstatus,education
0,31,Mickey Mouse,1950-01-01,Jakarta,Indonesia,YES,Bachelor
1,8,Iron Man,1960-03-03,Pontianak,Indonesia,NO,Doctorate
2,20,Beast,1960-03-03,Jambi,Indonesia,NO,Doctorate
3,38,Dale,1960-03-03,Pontianak,Indonesia,NO,Doctorate
4,50,Max Goof,1960-03-03,Jambi,Indonesia,NO,Doctorate


In [52]:
#import order transaction to BigQuery
tablename = 'ordertransaction'
table_id = "{}.{}".format(dataset_id, tablename) #fully qualified table name
print (table_id)

import_csv_to_bq(ordertransactionfile, table_id)

#preview imported data
bq_client.query("SELECT * FROM "+ "`" +table_id+ "`").to_dataframe().head()

#wait until dataframe preview completed!

bqstackdemo.clinic_dataset.ordertransaction
Loaded 279 rows and 7 columns to bqstackdemo.clinic_dataset.ordertransaction


Unnamed: 0,transactionid,custid,transactsitelocation,transactiondate,servicecategory,transactionamount,servingconsultant
0,TX3008,27,Surabaya,2022-01-08,Eye Care,800000,Ema
1,TX3028,33,Bandung,2022-01-28,Eye Care,800000,Ema
2,TX3040,21,Bandung,2022-02-10,Covid Test,2900000,Ema
3,TX1004,12,Bandung,2023-03-11,Consultation,500000,Ema
4,TX1024,12,Bandung,2023-03-31,Consultation,500000,Ema


In [53]:
#import survey data to BigQuery
tablename = 'satisfactionsurvey'
table_id = "{}.{}".format(dataset_id, tablename) #fully qualified table name
print (table_id)

import_csv_to_bq(surveyfile, table_id)

#preview imported data
bq_client.query("SELECT * FROM "+ "`" +table_id+ "`").to_dataframe().head()

#wait until dataframe preview completed!

bqstackdemo.clinic_dataset.satisfactionsurvey
Loaded 279 rows and 2 columns to bqstackdemo.clinic_dataset.satisfactionsurvey


Unnamed: 0,transactionid,surveyscore
0,TX3070,61
1,TX3091,61
2,TX3093,61
3,TX5147,61
4,TX3027,62
