# 環境設定

In [1]:
# 匯入軟體包
import os
from google.cloud import bigquery
from google.colab import drive
import pandas as pd

In [2]:
# 定義變數
project_id = 'tibame-gad253-14-bigquery'  # 替換為 Google Cloud 專案 ID
us_dataset_id='tibame_gad253_14_dataset_us'# 定義US資料集ID
connection='projects/tibame-gad253-14-bigquery/locations/us/connections/tibame_gad253_14_ai'# 外部連線ID
# 設置 Google Cloud 認證
os.environ['GOOGLE_APPLICATION_CREDENTIALS']=r"/content/tibame-gad253-14-bigquery-c5993cbf5beb.json" #金鑰位置
# 初始化 BigQuery 客戶端
client = bigquery.Client()
print('connection done')

connection done


# 建立嵌入模型Embedding

In [3]:
embedding_model='embedding_model'
#定義SQL查詢建立嵌入模型
query=f"""
CREATE OR REPLACE MODEL `{us_dataset_id}.{embedding_model}`
  REMOTE WITH CONNECTION `{connection}`
  OPTIONS (ENDPOINT = 'text-embedding-004');

"""
#印出查詢
print(query)

#執行查詢
query_job=client.query(query)
query_job.result()



CREATE OR REPLACE MODEL `tibame_gad253_14_dataset_us.embedding_model`
  REMOTE WITH CONNECTION `projects/tibame-gad253-14-bigquery/locations/us/connections/tibame_gad253_14_ai`
  OPTIONS (ENDPOINT = 'text-embedding-004');




<google.cloud.bigquery.table._EmptyRowIterator at 0x7f4ae9ae55b0>

# 建立文本嵌入資料表

In [5]:
table_id = 'embeddings'
#定義SQL查詢
query=f"""
CREATE OR REPLACE TABLE `{us_dataset_id}.{table_id}` AS
SELECT * FROM ML.GENERATE_EMBEDDING(
  MODEL `{us_dataset_id}.{embedding_model}`,
  (
    SELECT *, abstract AS content
    FROM `patents-public-data.google_patents_research.publications`
    WHERE LENGTH(abstract) > 0 AND LENGTH(title) > 0 AND country = 'Singapore'
  )
)
WHERE LENGTH(ml_generate_embedding_status) = 0;
"""
#印出查詢
print(query)

#執行查詢
query_job=client.query(query)
query_job.result()


CREATE OR REPLACE TABLE `tibame_gad253_14_dataset_us.embeddings` AS
SELECT * FROM ML.GENERATE_EMBEDDING(
  MODEL `tibame_gad253_14_dataset_us.embedding_model`,
  (
    SELECT *, abstract AS content
    FROM `patents-public-data.google_patents_research.publications`
    WHERE LENGTH(abstract) > 0 AND LENGTH(title) > 0 AND country = 'Singapore'
  )
)
WHERE LENGTH(ml_generate_embedding_status) = 0;



<google.cloud.bigquery.table._EmptyRowIterator at 0x7f4ae8b52450>

# 建立向量索引

In [None]:
#定義SQL查詢
query=f"""
CREATE OR REPLACE VECTOR INDEX my_index
ON `{us_dataset_id}.{table_id}`(ml_generate_embedding_result)
OPTIONS(index_type = 'IVF',
  distance_type = 'COSINE',
  ivf_options = '{{"num_lists":500}}');
"""
#印出查詢
print(query)

#執行查詢
query_job=client.query(query)
query_job.result()


In [7]:
#定義SQL查詢
query=f"""
SELECT table_name, index_name, index_status,
coverage_percentage, last_refresh_time, disable_reason
FROM `{us_dataset_id}.INFORMATION_SCHEMA.VECTOR_INDEXES`;
"""
#印出查詢
print(query)

#執行查詢
query_job=client.query(query)
results=query_job.result()

#將results的row解成tuple
rows_data= [tuple(row.values()) for row in results]
#抓出results欄位名稱
columns=[field.name for field in results.schema]
#import panda
import pandas as pd
#顯示資料
df=pd.DataFrame(rows_data,columns=columns)
#取消自動截斷
pd.set_option('display.max_colwidth', None)


SELECT table_name, index_name, index_status,
coverage_percentage, last_refresh_time, disable_reason
FROM `tibame_gad253_14_dataset_us.INFORMATION_SCHEMA.VECTOR_INDEXES`;



In [8]:
df

Unnamed: 0,table_name,index_name,index_status,coverage_percentage,last_refresh_time,disable_reason
0,embeddings,my_index,ACTIVE,0,,


# 相似性查詢

In [9]:
#定義SQL查詢 執行嵌入相似性查詢
query=f"""
SELECT query.query, base.publication_number, base.title, base.abstract
FROM VECTOR_SEARCH(
  TABLE `{us_dataset_id}.{table_id}`, 'ml_generate_embedding_result',
  (
  SELECT ml_generate_embedding_result, content AS query
  FROM ML.GENERATE_EMBEDDING(
  MODEL `{us_dataset_id}.{embedding_model}`,
  (SELECT 'improving password security' AS content))
  ),
  top_k => 5, options => '{{"fraction_lists_to_search": 0.01}}');

"""
#印出查詢
print(query)

#執行查詢
query_job=client.query(query)
results=query_job.result()

#將results的row解成tuple
rows_data= [tuple(row.values()) for row in results]
#抓出results欄位名稱
columns=[field.name for field in results.schema]
#import panda
import pandas as pd
#顯示資料
df=pd.DataFrame(rows_data,columns=columns)
#取消自動截斷
pd.set_option('display.max_colwidth', None)



SELECT query.query, base.publication_number, base.title, base.abstract
FROM VECTOR_SEARCH(
  TABLE `tibame_gad253_14_dataset_us.embeddings`, 'ml_generate_embedding_result',
  (
  SELECT ml_generate_embedding_result, content AS query
  FROM ML.GENERATE_EMBEDDING(
  MODEL `tibame_gad253_14_dataset_us.embedding_model`,
  (SELECT 'improving password security' AS content))
  ),
  top_k => 5, options => '{"fraction_lists_to_search": 0.01}');




In [None]:
df

# 產生增強向量搜尋結果

In [None]:
llmmodel = 'llm_model'
#定義SQL查詢
query=f"""
SELECT ml_generate_text_llm_result AS generated, prompt
FROM ML.GENERATE_TEXT(
  MODEL `{us_dataset_id}.{llmmodel}`,
  (
    SELECT CONCAT(
      'Propose some project ideas to improve user password security using the context below: ',
      STRING_AGG(
        FORMAT("patent title: %s, patent abstract: %s", base.title, base.abstract),
        ',\\n')
      ) AS prompt,
    FROM VECTOR_SEARCH(
      TABLE `{us_dataset_id}.{table_id}`, 'ml_generate_embedding_result',
      (
        SELECT ml_generate_embedding_result, content AS query
        FROM ML.GENERATE_EMBEDDING(
          MODEL `{us_dataset_id}.{embedding_model}`,
         (SELECT 'improving password security' AS content)
        )
      ),
    top_k => 5, options => '{{"fraction_lists_to_search": 0.01}}')
  ),
  STRUCT(600 AS max_output_tokens, TRUE AS flatten_json_output));

"""
#印出查詢
print(query)

#執行查詢
query_job=client.query(query)
results=query_job.result()

#將results的row解成tuple
rows_data= [tuple(row.values()) for row in results]
#抓出results欄位名稱
columns=[field.name for field in results.schema]
#import panda
import pandas as pd
#顯示資料
df=pd.DataFrame(rows_data,columns=columns)
#取消自動截斷
pd.set_option('display.max_colwidth', None)


In [14]:
df

Unnamed: 0,generated,prompt
0,"Okay, based on the provided patent abstracts, here are some project ideas aimed at improving user password security, categorized by their focus and complexity:\n\n**I. Enhanced Password Entry & Authentication:**\n\n* **Project 1: Dynamic Password Entry with Keystroke Obfuscation (Building on ""Improved system and method for random entry of password"" and ""Encryption system for confidential data transmission""):**\n\n * **Concept:** Combine the random entry concept with real-time encryption of keystrokes. Instead of just asterisks, the system displays decoy characters that change dynamically. The actual password characters are encrypted *before* they even reach the operating system, mitigating keystroke logging.\n * **Implementation:**\n * Develop a custom input method (e.g., a browser extension or a dedicated application).\n * Implement a dynamic character display where the user clicks/taps on a grid of characters. The grid changes after each selection.\n * Use a lightweight encryption algorithm (e.g., AES) to encrypt each character as it's entered. The encryption key could be derived from a session-specific secret.\n * On the server-side, decrypt the sequence and compare it to the stored password.\n * **Benefits:** Significantly harder for keyloggers to capture the actual password. The dynamic display makes shoulder surfing more difficult.\n * **Complexity:** Medium to High (requires understanding of input methods, cryptography, and secure communication).\n\n* **Project 2: Representative Character Authentication with Enhanced Verification (Building on ""Method and system for protecting a password during an authentication process""):**\n\n * **Concept:** Expand the representative character approach with more sophisticated verification. Instead of just a single verification code, use a challenge-response system.\n * **Implementation:**\n * The server sends a set of representative characters *and* a challenge question (e.g., ""What is the second letter of your mother's maiden name?"").\n * The user enters a mixed string of representative characters, the password, and the *answer* to the challenge question, all interspersed.\n * The server verifies both the password and the answer to the challenge question.\n * **Benefits:** Adds an extra layer of security beyond just the password. Makes it harder for attackers to replay captured authentication data.\n * **Complexity:** Medium (requires secure storage of challenge questions and answers, and careful design to prevent information leakage).\n\n* **Project 3: Context-Aware Password Entry (Inspired by all patents):**\n\n * **Concept:** The password entry method adapts based on the user's location, device, and network.\n * **Implementation:**\n * Use geolocation APIs, device fingerprinting, and network analysis to determine the user'","Propose some project ideas to improve user password security using the context below: patent title: Passsword management system and process, patent abstract: PASSSWORD MANAGEMENT SYSTEM AND PROCESS There is provided a password management process and system. The updating of the 5 password data in the process and system is preferably performed based, at least in part, on the functional account data and corresponding scheduling data, said scheduling data representing criteria for updating the password of, at least, the particular functional account. 10 Figure 1b,\npatent title: Improved system and method for random entry of password, patent abstract: IMPROVED SYSTEM AND METHOD FOR RANDOM ENTRY OF PASSWORD The invention relates to a system and a method which assigns an incomplete security code/PIN/password each time an user is to access a secured system employing a security code/PIN/password entry system, so that to an observer, the password is entered in a random manner. The user will for security code/PIN/password of the length N, be shown N blanks spaces with some of the spaces randomly filled up with asterisk for which no data entry is required. The user is required to enter the remaining characters of the security/PIN/password when prompted by the system to correctly enter each character corresponding to the position of the password prompted. The order of entering the remaining characters of the security code/PIN/password will be randomly chosen by the system through the use of prompting. If a touch screen is used to enter the characters, then it will be possible to change the position of the keypad on the screen for each entry in an orderly manner.,\npatent title: Method and system for protecting a password during an authentication process, patent abstract: A system for providing security for a personal password during an authenticationprocess. The system combines the use of representative characters to disguise the characters of the password and the use of a separate verification code sent to the user for use in the authentication process. A server generates and sends both a set of representative characters and a verification code to a client device. The user then inputs amixed string having the password and verification code interspersed in order using the representative characters in place of the characters of the password and verification code. The server then receives the input and determines whether the string of representative characters includes the password and verification code characters in the proper order.FIGURE 5,\npatent title: Data storage device security method and apparatus, patent abstract: Methods for improving security in data storage devices are disclosed. The methods include a synchronization method by which an encrypted password, using any known encryption algorithm, keeps changing at each transmission from host to data storage device. Additionally, a security system for implementing the security method is provided.,\npatent title: Encryption system for confidential data transmission, patent abstract: The ubiquitous and borderless Internet has greatly increased connectivity between people. Together with its popularity as a commercial activities platform, the Internet has also become a popular target for cyber criminals who intentionally exploit on the Internet&#39;s numerous vulnerabilities. Perpetrators exploit the vulnerability of computer systems with malicious software programs designed specifically to steal confidential information such as user names, passwords and credit card numbers by recording keystrokes from the computer keyboard. The two most common methods used are keystroke recording and phishing scams. Phishing is a means of fraudulently acquiring confidential information through deception, for example, by masquerading as an official email or website for requesting such information. Online services relying on password challenges alone for authentication are the most vulnerable to keystroke recording as well as to other means of obtaining passwords. Multi-conditional authentication can be deployed to replace Password Challenge. However, these solutions incur high implementation cost and bring forth privacy issues. Embodiments of the invention describe an encryptor and a method for implementation thereof for encrypting data input and a dynamic variable obtainable by the encryptor into encrypted data. The encryptor data communicatively interfaces an input device and a computing system wherein a user is able to interact with the input device for generating the data input. The encrypted data is transmitted to and relayed by the computer system for subsequent reception and decryption by a decryptor for comparing with a reference dynamic variable obtainable by the decryptor for verifying validity of data decrypted therefrom."
