# Section 1: Preprocessing
+ In this section, we deal with all preprocessing steps required for the rest of this notebook, including importing libraries, installing necessary packages, initializing client module for Google BigQuery etc.

In [1]:
# Path checking
%pwd

'/kaggle/working'

In [2]:
# Install google-cloud-bigquery-storage for running BigQuery SQL without error.
!pip --quiet install google-cloud-bigquery-storage

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m296.5/296.5 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
bigframes 2.8.0 requires google-cloud-bigquery[bqstorage,pandas]>=3.31.0, but you have google-cloud-bigquery 3.25.0 which is incompatible.
bigframes 2.8.0 requires rich<14,>=12.4.4, but you have rich 14.0.0 which is incompatible.[0m[31m
[0m

In [3]:
# Import all libraries for this project
import pandas as pd

from google.cloud import bigquery, storage
from datetime import datetime, timedelta

In [4]:
# Initialize BigQuery client
client = bigquery.Client(project='analog-delight-470708-d0')

In [5]:
# testing! - to be removed.
query = """
select `App ID`, Name, `Short Description` 
from `analog-delight-470708-d0.steam.steam_game_list`
where `App ID` = '1032020'; 
"""
df = client.query(query).to_dataframe()
print(df)

    App ID                       Name  \
0  1032020  Shantalia and Corali'hulu   

                                   Short Description  
0  A short and linear story. Every 20 years, a ma...  


# Section 2: 

In [6]:
# Example
query = """
SELECT embedding 
FROM ML.GENERATE_EMBEDDING(  
  MODEL `analog-delight-470708-d0.steam.llm_steam`,  
  (SELECT 'Example text to embed' AS content)
) AS embedding;
"""
df = client.query(query).to_dataframe()
print(df)

                                           embedding
0  {'ml_generate_embedding_result': [-0.010428389...


In [7]:
# Take a look at the embedding
df.iloc[0, 0]

{'ml_generate_embedding_result': array([-1.04283895e-02,  2.96470821e-02, -7.43054599e-02,  2.11055744e-02,
         5.47670871e-02,  1.45342695e-02,  4.91497517e-02,  1.72161181e-02,
        -2.55930442e-02, -1.82761587e-02, -1.13353962e-02,  5.70037663e-02,
         7.43808225e-02,  9.53106023e-03,  8.26539937e-03, -2.44385265e-02,
        -8.46037921e-03, -2.09631305e-02, -1.06419526e-01,  4.15838975e-03,
         8.16948060e-03, -3.48685347e-02,  7.83672556e-03, -7.71326199e-02,
        -3.56542468e-02, -6.04376569e-02,  5.97310215e-02, -7.64682889e-03,
         1.58150326e-02,  2.42397771e-03,  2.08303090e-02,  4.81331460e-02,
         5.75856753e-02, -1.40861599e-02,  1.02293072e-02, -2.20989957e-02,
         1.22030349e-02, -2.17509270e-02,  3.19595970e-02, -2.49467473e-02,
        -4.97038662e-02,  7.47113582e-03, -4.10959534e-02, -7.64197391e-03,
        -1.22565199e-02, -5.48531674e-02, -3.48668732e-03, -1.61631014e-02,
        -1.30632930e-02,  4.94526736e-02,  1.16624698e-0