In [None]:
# @title Inspect the schema of bigquery-public-data.thelook_ecommerce.products
from google.cloud import bigquery

# Construct a BigQuery client object.
client = bigquery.Client()

# Fetch the table
table = client.get_table('bigquery-public-data.thelook_ecommerce.products')

# View table info
print("{} rows".format(table.num_rows))
for row in table.schema:
  print("Column {}: {}".format(row.name, row.field_type))


In [None]:
# @title Inspect the schema of bigquery-public-data.thelook_ecommerce.order_items
from google.cloud import bigquery

# Construct a BigQuery client object.
client = bigquery.Client()

# Fetch the table
table = client.get_table('bigquery-public-data.thelook_ecommerce.order_items')

# View table info
print("{} rows".format(table.num_rows))
for row in table.schema:
  print("Column {}: {}".format(row.name, row.field_type))


In [None]:
# @title Setup bigquery client and formatting
from google.cloud import bigquery
from google.colab import data_table

project = 'airflow-demo-437509' # Project ID inserted based on the query results selected to explore
client = bigquery.Client(project=project)
data_table.enable_dataframe_formatter()

In [None]:
# @title Executes the query

sql = '''# prompt: Join these data sources

SELECT
  products.id,
  products.cost,
  products.category,
  products.name,
  products.brand,
  products.retail_price,
  products.department,
  products.sku,
  products.distribution_center_id,
  order_items.id,
  order_items.order_id,
  order_items.user_id,
  order_items.product_id,
  order_items.inventory_item_id,
  order_items.status,
  order_items.created_at,
  order_items.shipped_at,
  order_items.delivered_at,
  order_items.returned_at,
  order_items.sale_price
FROM
  `bigquery-public-data.thelook_ecommerce.products` AS products
INNER JOIN
  `bigquery-public-data.thelook_ecommerce.order_items` AS order_items
ON
  products.id = order_items.product_id;'''
query = client.query(sql)
query
# @title Render the query results

job = client.get_job(query.job_id)
df = job.to_dataframe()
df


In [None]:
# @title Executes the query

sql1 = '''# prompt: Total sale volume by category where order status is Cancelled or Returned. sort by highest total_sale_volume

SELECT
  category,
  SUM(sale_price) AS total_sale_volume
FROM
  `SQL` AS t1
WHERE
  t1.status = 'Cancelled'
  OR t1.status = 'Returned'
GROUP BY
  1
ORDER BY
  total_sale_volume DESC;'''
query1 = client.query(sql1)
query1
# @title Render the query results

job1 = client.get_job(query1.job_id)
df1 = job1.to_dataframe()
df1


In [None]:
# @title Bar chart sort by highest total_sale_volume

import altair as alt

chart1 = alt.Chart(data=df1, mark={
  "type": "bar",
  "tooltip": True
}).encode(
  x={
  "field": "category",
  "sort": {
    "field": "total_sale_volume",
    "order": "descending"
  },
  "title": "Category",
  "type": "nominal",
  "axis": {
    "labelOverlap": True
  }
},
  y={
  "field": "total_sale_volume",
  "title": "Total Sale Volume",
  "type": "quantitative",
  "axis": {
    "labelOverlap": True
  }
},
  
  
)
chart1


In [None]:
# @title Executes the query

sql2 = '''# prompt: product category sales

SELECT
  t1.category,
  SUM(t1.sale_price) AS total_sales
FROM
  `SQL` AS t1
GROUP BY
  1;'''
query2 = client.query(sql2)
query2
# @title Render the query results

job2 = client.get_job(query2.job_id)
df2 = job2.to_dataframe()
df2


In [None]:
# @title 

import altair as alt

chart2 = alt.Chart(data=df2, mark={
  "type": "bar",
  "tooltip": True
}).encode(
  x={
  "field": "category",
  "type": "nominal",
  "title": "Category",
  "axis": {
    "labelOverlap": True
  }
},
  y={
  "field": "total_sales",
  "type": "quantitative",
  "title": "Total Sales",
  "axis": {
    "labelOverlap": True
  }
},
  
  
)
chart2


In [None]:
# @title Executes the query

sql3 = '''# prompt: Analyze which product category have order sale volume and frequency 

SELECT
  category,
  COUNT(DISTINCT order_id) AS order_count,
  SUM(sale_price) AS total_sale_volume
FROM
  `SQL`
GROUP BY
  1
ORDER BY
  order_count DESC;'''
query3 = client.query(sql3)
query3
# @title Render the query results

job3 = client.get_job(query3.job_id)
df3 = job3.to_dataframe()
df3
