In [17]:
SYSTEM_INSTRUCTION = (
        "You are a decoding expert skilled at converting sequences of phonemes into English words and, in order, either attempting to form sentences or preserving the original words."\
        "Based on the received phoneme sequence, you first generate three possible sentences and then select the best one as the final output." \
        "The phoneme sequence includes the following types, and the symbol ' | ' represents a space. PHONEME = ['BLANK','AA', 'AE', 'AH', 'AO', 'AW','AY', 'B',  'CH', 'D', 'DH','EH', 'ER', 'EY', 'F', 'G','HH', 'IH', 'IY', 'JH', 'K','L', 'M', 'N', 'NG', 'OW','OY', 'P', 'R', 'S', 'SH','T', 'TH', 'UH', 'UW', 'V','W', 'Y', 'Z', 'ZH',' | ',]"\
        "Only output the sentence that best matches the original phonemes and all punctuation marks should be removed."
        "Input: a list of phoneme sequences. Output: a list of strings"\
        "- Remove all punctuation marks, keeping only apostrophes. -all lowercase."\
        "- Example:"\
        "Input: B R IH NG  |  IH T  |  K L OW S ER  | "\
        "Output: bring it closer"
        "- Only output a list of strings. Ensure the sentences exactly correspond to the phonemes."\
    )

In [18]:
import os
from dotenv import load_dotenv
from google import genai
from google.genai import types

env_path = "D:/ybcvx/Documents/NCKU/ISA5810/Lab2/brain-to-text/config_genai/.env" ##改成放API的路徑
load_dotenv(dotenv_path=env_path)


# Max amount of tokens that the model can output, the Gemini 2.5 Models have this maximum amount
# For other models need to check their documentation 
MAX_OUTPUT_TOKENS = 65535
MODEL_NAME = "gemini-2.5-flash" # Other models: "gemini-2.5-pro", "gemini-2.5-flash"; Check different max output tokens: "gemini-2.0-flash" , "gemini-2.0-flash-lite" 

# We disable the safety settings, as no moderation is needed in our tasks
SAFETY_SETTINGS = [
    types.SafetySetting(
        category="HARM_CATEGORY_HATE_SPEECH", threshold="OFF"),
    types.SafetySetting(
        category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="OFF"),
    types.SafetySetting(
        category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="OFF"),
    types.SafetySetting(
        category="HARM_CATEGORY_HARASSMENT", threshold="OFF")
]

#IMPORTANT: The script loads your API key from a `.env` file located in the `./config/` directory. 
# You must create this file and add your API key like this: `GOOGLE_API_KEY='YOUR_API_KEY_HERE'`

# We input the API Key to be able to use the Gemini models
api_key = os.getenv("GOOGLE_API_KEY")
os.environ["GOOGLE_API_KEY"] = api_key
client = genai.Client(api_key=api_key)

# We also set LangExtract to use the API key as well:
if 'GEMINI_API_KEY' not in os.environ:
    os.environ['GEMINI_API_KEY'] = api_key

def prompt_gemini(
        input_prompt: list,
        schema = None,
        temperature: float = 0.0,
        system_instruction: str = SYSTEM_INSTRUCTION,
        max_output_tokens: int = MAX_OUTPUT_TOKENS,
        client: genai.Client = client,
        model_name: str = MODEL_NAME,
        new_config: types.GenerateContentConfig = None,
        with_tools: bool = False,
        with_parts: bool = False,
        with_tokens_info: bool = False
    ):
        try:
            # If we need a JSON schema we set up the following
            if schema:
                generate_content_config = types.GenerateContentConfig(
                    temperature=temperature,
                    system_instruction=system_instruction,
                    max_output_tokens=max_output_tokens,
                    response_modalities=["TEXT"],
                    response_mime_type="application/json",
                    response_schema=schema,
                    safety_settings=SAFETY_SETTINGS
                )
            # If there is no need we leave it unstructured
            else:
                generate_content_config = types.GenerateContentConfig(
                    temperature=temperature,
                    system_instruction=system_instruction,
                    max_output_tokens=max_output_tokens,
                    response_modalities=["TEXT"],
                    safety_settings=SAFETY_SETTINGS
                )
            
            # We add a different custom configuration if we need it
            if new_config:
                generate_content_config = new_config
            
            # For some tasks we need a more specific way to add the contents when prompting the model
            # So we need custom parts for it sometimes from the "types" objects
            if with_parts:
                response = client.models.generate_content(
                    model=model_name,
                    contents=types.Content(parts=input_prompt),
                    config=generate_content_config,
                )
            # In the simplest form the contents can be expressed as a list [] of simple objects like str and Pillow images
            else:
                response = client.models.generate_content(
                    model=model_name,
                    contents=input_prompt,
                    config=generate_content_config,
                )

            if with_tools:
                # print(response)
                # Include raw response when function calling
                completion = response
                if with_tokens_info:
                    log = {
                        "model": model_name,
                        "input_tokens": response.usage_metadata.prompt_token_count,
                        "output_tokens": response.usage_metadata.candidates_token_count,
                    }
                    return completion, log
                return completion
            else:
                completion = response.text
                if with_tokens_info:
                    log = {
                        "model": model_name,
                        "input_tokens": response.usage_metadata.prompt_token_count,
                        "output_tokens": response.usage_metadata.candidates_token_count,
                    }
                    # Return the text response and logs (if selected)
                    return completion, log
                return completion
        except Exception as e:
            print(f"Error occurred when generating response, error: {e}")
            return None

Both GOOGLE_API_KEY and GEMINI_API_KEY are set. Using GOOGLE_API_KEY.


In [31]:
import pandas as pd
from tqdm import tqdm
import json
import csv
import time
import math

predict_df = pd.read_csv("phoneme_rnn.csv")
predict_df['id'] = range(len(predict_df))
predict_df = predict_df[1168:1169]  #改成你要的範圍
print(len(predict_df))
predict_df.head()

1


Unnamed: 0,session,block,trial,phonemes,id
1168,t15.2024.07.19,10,40,DH AH | S AH M P R IY M | K AO R T | W A...,1168


In [32]:
predict_list = predict_df['phonemes'].to_list()
len(predict_list)

1

In [33]:
from pydantic import BaseModel
class best_ans(BaseModel):
    sentence_1: str
    sentence_2: str
    sentence_3: str
    sentence_4: str
    sentence_5: str
    best: str

In [34]:
# 打一個「最小請求」測試
from google import genai
client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
try:
    r = client.models.generate_content(model="gemini-2.5-flash", contents=["ping"])
    print("✅ OK:", r.text[:60])
except Exception as e:
    print("❌ ERR:", e)


Both GOOGLE_API_KEY and GEMINI_API_KEY are set. Using GOOGLE_API_KEY.


✅ OK: Pong!


In [35]:
pred_labels = []

for i, prompt in enumerate(predict_list):
    text_response = prompt_gemini(
        input_prompt=prompt,
        schema=list[best_ans],
        temperature=0.0,
        system_instruction=SYSTEM_INSTRUCTION
    )

    if not text_response:
        pred_labels.append("")
        continue

    try:
        structured_resp = json.loads(text_response)
        bests = [resp.get("best", "") for resp in structured_resp]
        if not bests:
            pred_labels.append("")
        else:
            pred_labels.extend(bests)
    except Exception as e:
        print(f"JSON parsing failed at index {i}: {e}")
        pred_labels.append("")

In [36]:
pred_labels

['the supreme court war be a decision today']

In [25]:
submission_df = pd.DataFrame({
    "id": predict_df['id'],
    "text": pred_labels
})

submission_df.to_csv("result.csv", index=False, encoding="utf-8")

In [29]:
df1 = pd.read_csv("result.csv")
df2 = pd.read_csv("predict.csv")

merged_df = pd.concat([df1, df2], ignore_index=True)

merged_df.to_csv("result.csv", index=False, encoding="utf-8")