In [24]:
# @title CELL 1: Investigator Input Configuration (names, lookback, time window)
# ✅ CELL 1: Investigator Input Configuration (names, lookback, time window)
import re
from datetime import datetime, timedelta, timezone

# Get input from investigator
raw_input_names = input("🎙️ Enter voice name keywords (comma-separated):\n")
lookback_days_input = input("🗓️  How many days back should we look? (e.g. 5):\n")
tts_window_minutes_input = input("⏱️  How many minutes after NGV attempt should we look for TTS generations? (e.g. 30):\n")

# Normalize and sanitize names
voice_fragments = [
    name.strip().lower().replace("’", "'") for name in raw_input_names.split(",") if name.strip()
]
escaped_fragments = [re.escape(name) for name in voice_fragments]

# Parse numeric inputs
try:
    lookback_days = int(lookback_days_input.strip())
except ValueError:
    raise ValueError("Invalid number for lookback days.")

try:
    tts_window_minutes = int(tts_window_minutes_input.strip())
except ValueError:
    raise ValueError("Invalid number for TTS time window.")

# Set up config
INVESTIGATION_CONFIG = {
    "name": "NGV Abuse Check",
    "voice_names": voice_fragments,
    "voice_names_escaped": escaped_fragments,
    "lookback_days": lookback_days,
    "tts_window_minutes": tts_window_minutes,
    "start_time": (datetime.now(timezone.utc) - timedelta(days=lookback_days)).replace(microsecond=0).isoformat(),
}

# Confirm config
print("\n✅ Investigation configuration set:")
print(f"• Name: {INVESTIGATION_CONFIG['name']}")
print(f"• Voice Keywords: {INVESTIGATION_CONFIG['voice_names']}")
print(f"• Lookback Days: {INVESTIGATION_CONFIG['lookback_days']}")
print(f"• TTS Window (minutes): {INVESTIGATION_CONFIG['tts_window_minutes']}")
print(f"• Start Time: {INVESTIGATION_CONFIG['start_time']}")


🎙️ Enter voice name keywords (comma-separated):
netanyahu
🗓️  How many days back should we look? (e.g. 5):
14
⏱️  How many minutes after NGV attempt should we look for TTS generations? (e.g. 30):
5

✅ Investigation configuration set:
• Name: NGV Abuse Check
• Voice Keywords: ['netanyahu']
• Lookback Days: 14
• TTS Window (minutes): 5
• Start Time: 2025-07-06T19:48:14+00:00


In [25]:
# @title Cell 1.5: BigQuery Client Setup
# ✅ Cell 1: BigQuery Client Setup
from google.cloud import bigquery

try:
    client = bigquery.Client()
    print("✅ BigQuery client initialized.")
except Exception as e:
    raise RuntimeError(f"❌ Failed to initialize BigQuery client: {e}")


✅ BigQuery client initialized.


In [26]:
# @title Cell 2: Pull NGV attempts from BigQuery (with safe substring filtering)
from datetime import datetime, timedelta
from google.cloud import bigquery

def get_ngv_attempts(config, client):
    """
    Pull NGV cloning attempts from BigQuery filtered by timestamp and voice name substrings.
    """

    start_time = config["start_time"]
    lookback_days = config["lookback_days"]
    substrings = config["voice_names"]

    # Calculate cutoff timestamp
    cutoff_time = datetime.fromisoformat(start_time) - timedelta(days=lookback_days)
    cutoff_str = cutoff_time.isoformat()

    # Build safe LIKE conditions
    conditions = []
    for substr in substrings:
        escaped = substr.strip().lower().replace("'", "''")
        conditions.append(f"LOWER(nogo_voice_name) LIKE '%{escaped}%'")

    name_condition = " OR ".join(conditions)

    query = f"""
    SELECT
        id,
        timestamp,
        user_uid,
        workspace_id,
        user_email,
        voice_id,
        nogo_voice_name,
        subscription_tier
    FROM `xi-labs.xi_prod.nogo_voice_check`
    WHERE timestamp >= TIMESTAMP('{cutoff_str}')
      AND ({name_condition})
    ORDER BY timestamp DESC
    """

    try:
        print("⏳ Running NGV attempt query...")
        ngv_df = client.query(query).to_dataframe()
        if ngv_df.empty:
            print("⚠️ No NGV attempts found.")
        else:
            print(f"✅ Retrieved {len(ngv_df)} NGV attempts.")
        return ngv_df
    except Exception as e:
        raise RuntimeError(f"ERROR during NGV attempt query: {e}")

# Run the query and store raw results
ngv_attempts_raw_df = get_ngv_attempts(INVESTIGATION_CONFIG, client)


⏳ Running NGV attempt query...
✅ Retrieved 201 NGV attempts.


In [27]:
# @title Cell 3: Filter NGV attempts by name substrings
import pandas as pd
import re

def filter_ngv_attempts_by_name_substrings(df: pd.DataFrame, substrings: list) -> pd.DataFrame:
    """
    Filters NGV attempts to only those where nogo_voice_name contains any of the given substrings.
    Substrings are case-insensitive and stripped of common punctuation.
    Also prints match counts per substring.
    """
    if df.empty:
        print("⚠️ No NGV attempts to filter.")
        return df

    # Clean input substrings
    clean_substrings = [s.strip().lower().replace("'", "").replace('"', "") for s in substrings]

    def matches_any_substring(name):
        if pd.isna(name):
            return False
        name_clean = str(name).lower().replace("'", "").replace('"', "")
        return any(sub in name_clean for sub in clean_substrings)

    # Filtered DataFrame
    filtered_df = df[df["nogo_voice_name"].apply(matches_any_substring)]

    # Per-substring match counts
    print(f"✅ {len(filtered_df)} NGV attempts matched voice names: {clean_substrings}")
    for substr in clean_substrings:
        count = df["nogo_voice_name"].dropna().apply(lambda x: substr in x.lower().replace("'", "").replace('"', "")).sum()
        print(f"   • {substr}: {count} matches")

    if filtered_df.empty:
        print("⚠️ No rows matched the input substrings.")

    return filtered_df

# Run it
ngv_attempts_df = filter_ngv_attempts_by_name_substrings(
    ngv_attempts_raw_df,
    INVESTIGATION_CONFIG["voice_names"]
)


✅ 201 NGV attempts matched voice names: ['netanyahu']
   • netanyahu: 201 matches


In [28]:
# @title NEW. ----CELL 4: Filter NGV attempts by fuzzy voice name match
import re

# Define filtering function
def filter_ngv_attempts_by_name_substrings(ngv_df, voice_substrings):
    if ngv_df.empty:
        print("⚠️ No NGV attempts to filter.")
        return ngv_df

    # Normalize substrings (e.g., escape apostrophes)
    clean_substrings = [re.escape(name.strip().lower()) for name in voice_substrings]

    # Match if any cleaned substring appears in the nogo_voice_name
    def matches_any_substring(name: str) -> bool:
        if not isinstance(name, str):
            return False
        name = name.lower()
        return any(substr in name for substr in clean_substrings)

    filtered_df = ngv_df[ngv_df["nogo_voice_name"].apply(matches_any_substring)]

    print(f"✅ Filtered NGV attempts: {len(filtered_df)} matched from {len(ngv_df)} total")
    return filtered_df

# Run it
ngv_attempts_df = filter_ngv_attempts_by_name_substrings(
    ngv_attempts_raw_df,
    INVESTIGATION_CONFIG["voice_names"]
)

# Preview
ngv_attempts_df.head()


✅ Filtered NGV attempts: 201 matched from 201 total


Unnamed: 0,id,timestamp,user_uid,workspace_id,user_email,voice_id,nogo_voice_name,subscription_tier
0,AiHiNwyqUp12IpmUjhJM,2025-07-15 23:29:07.791863+00:00,C7nINzAnhihHWGjMaBE22dNQnJi1,3c3ebc7e814a4f278b05a5bb7be9c25d,ariliberman20@gmail.com,bXzZ9w0PeVw18C8PyCxr,Benjamin Netanyahu,starter
1,0vDCDA5tz98kJvbgyNhn,2025-07-15 21:46:17.942689+00:00,C7nINzAnhihHWGjMaBE22dNQnJi1,3c3ebc7e814a4f278b05a5bb7be9c25d,ariliberman20@gmail.com,HyUv1ighehaEAdCh2QKr,Benjamin Netanyahu,starter
2,Oku8ciEGAwl8r8iiBHp6,2025-07-15 21:36:53.888751+00:00,C7nINzAnhihHWGjMaBE22dNQnJi1,3c3ebc7e814a4f278b05a5bb7be9c25d,ariliberman20@gmail.com,hh20VT5xr1BIBTc6X7Xb,Benjamin Netanyahu,starter
3,lGw8WjxQCM0wf7ZNqMGA,2025-07-15 17:25:18.655933+00:00,UQjm29XR8fM4Yygn0ASpf50xHEw2,e3d6b9f1df2b440aaae8bc25911ecea7,guylevinberg@gmail.com,cVInJO8MIALVFqmTSa0F,Benjamin Netanyahu,starter
4,4a9TdUYwVKFXoJL0chW4,2025-07-15 14:47:11.626104+00:00,8E8xv5eWrabx3L64c6KroNRHDEo2,92ba788cc2cc40af8127bcaeae0ed8da,shlomokashani@gmail.com,jKHFYCkuIbjHIgm79i09,Benjamin Netanyahu,starter


In [29]:
# @title NEW --- CELL 5: Efficiently fetch all TTS generations for NGV users using a single BigQuery query

from google.cloud import bigquery

def get_tts_generations_after_ngv_window(ngv_attempts_df, client, window_minutes=30):
    """
    Fetch all TTS generations across all users after their NGV attempt within a time window.

    Args:
        ngv_attempts_df (pd.DataFrame): DataFrame of NGV attempts
        client (bigquery.Client): BigQuery client
        window_minutes (int): Time window after NGV attempt (default 30)

    Returns:
        List[dict]: Each dict contains metadata and a DataFrame of matched TTS generations
    """
    if ngv_attempts_df.empty:
        print("⚠️ No NGV attempts to analyze.")
        return []

    # Build VALUES clause from all users and timestamps
    values_clause = ",\n".join([
        f"('{row['user_uid']}', TIMESTAMP('{row['timestamp']}'), '{row.get('nogo_voice_name', 'Unknown')}', '{row.get('workspace_id', '')}')"
        for _, row in ngv_attempts_df.iterrows()
    ])

    query = f"""
    WITH ngv_attempts AS (
      SELECT * FROM UNNEST([
        STRUCT<user_uid STRING, ngv_time TIMESTAMP, nogo_voice_name STRING, workspace_id STRING>
        {values_clause}
      ])
    )
    SELECT
      a.user_uid,
      a.ngv_time,
      a.nogo_voice_name,
      a.workspace_id,
      t.voice_name,
      t.timestamp,
      t.text,
      t.voice_id,
      t.audio_url,
      t.user_email,
      t.workspace_id AS tts_workspace_id
    FROM ngv_attempts a
    JOIN `xi-labs.xi_prod.tts_usage_partitioned` t
    ON a.user_uid = t.user_uid
    WHERE t.timestamp BETWEEN a.ngv_time AND TIMESTAMP_ADD(a.ngv_time, INTERVAL {window_minutes} MINUTE)
    ORDER BY a.user_uid, t.timestamp
    """

    print(f"⏳ Querying TTS generations within {window_minutes} minutes for {len(ngv_attempts_df)} users...")
    tts_df = client.query(query).to_dataframe()

    if tts_df.empty:
        print("⚠️ No TTS generations found for any NGV attempt.")
        return []

    # Group back into result format per user
    results = []
    for user_uid, group in tts_df.groupby("user_uid"):
        match = ngv_attempts_df[ngv_attempts_df["user_uid"] == user_uid].iloc[0]
        results.append({
            "user_uid": user_uid,
            "ngv_attempt_time": match["timestamp"],
            "nogo_voice_name": match.get("nogo_voice_name", "Unknown"),
            "tts_generations": group,
            "ngv_row": match
        })

    print(f"✅ Retrieved TTS generations for {len(results)} users")
    return results

# Run it
tts_results = get_tts_generations_after_ngv_window(
    ngv_attempts_df,
    client,
    window_minutes=INVESTIGATION_CONFIG["tts_window_minutes"]
)


⏳ Querying TTS generations within 5 minutes for 201 users...
✅ Retrieved TTS generations for 28 users


In [38]:
# ✅ CELL 6: Stitch narratives, translate to English, add audio links

import pandas as pd
from tqdm import tqdm
import getpass
from openai import OpenAI

# Ask for OpenAI API key
api_key = getpass.getpass("🔑 Enter your OpenAI API key:")
openai_client = OpenAI(api_key=api_key)

# Translation function
def translate_to_english(text):
    try:
        response = openai_client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "Translate the following text to English."},
                {"role": "user", "content": text}
            ],
            temperature=0.2
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"⚠️ Translation failed: {type(e).__name__} – {e}")
        return "TRANSLATION ERROR"

# Stitching function
def stitch_ngv_tts_results_with_links(tts_results):
    if not tts_results:
        print("⚠️ No TTS results to summarize.")
        return pd.DataFrame()

    stitched_rows = []

    for result in tts_results:
        tts_df = result["tts_generations"]
        if tts_df.empty:
            continue

        user_uid = result["user_uid"]
        workspace_id = result["ngv_row"].get("workspace_id", "")
        nogo_voice_name = result.get("nogo_voice_name", "Unknown")

        grouped = tts_df.groupby("voice_name")

        for voice_name, group in grouped:
            group = group.sort_values("timestamp")
            full_text_block = " ".join(group["text"].astype(str).tolist())

            # Deduplicate voice URLs
            audio_urls = group["audio_url"].dropna().unique().tolist()

            # Format as clickable links (HTML-safe)
            link_list = [
                f'<a href="{url}" target="_blank">{i+1}</a>' for i, url in enumerate(audio_urls)
            ]
            voice_file_links = " | ".join(link_list)

            stitched_rows.append({
                "user_uid": user_uid,
                "workspace_id": workspace_id,
                "nogo_voice_name": nogo_voice_name,
                "voice_name": voice_name,
                "tts_count": len(group),
                "start_time": group["timestamp"].iloc[0],
                "end_time": group["timestamp"].iloc[-1],
                "full_text_block": full_text_block,
                "voice_file_links": voice_file_links
            })

    summary_df = pd.DataFrame(stitched_rows)

    if summary_df.empty:
        print("⚠️ No stitched narratives produced.")
        return summary_df

    # Translate with progress bar
    print(f"🔄 Translating {len(summary_df)} stitched TTS narratives...")
    tqdm.pandas()
    summary_df["translated_text"] = summary_df["full_text_block"].progress_apply(translate_to_english)

    print(f"✅ Stitched {len(summary_df)} voice-specific narratives across all users")
    return summary_df

# Run it
tts_summary_df = stitch_ngv_tts_results_with_links(tts_results)

# Display as HTML to enable clickable links
from IPython.display import display, HTML
display(HTML(tts_summary_df.to_html(escape=False)))


🔑 Enter your OpenAI API key:··········
🔄 Translating 40 stitched TTS narratives...


100%|██████████| 40/40 [02:52<00:00,  4.31s/it]

✅ Stitched 40 voice-specific narratives across all users





Unnamed: 0,user_uid,workspace_id,nogo_voice_name,voice_name,tts_count,start_time,end_time,full_text_block,voice_file_links,translated_text
0,0dLrm5uXvKgTObDNM8zRYHN61Fb2,660763e79c6647808c8d1ecd193ca4a1,Benjamin Netanyahu,Burak Namlı,4,2025-07-03 11:47:49.015328+00:00,2025-07-03 11:48:47.383313+00:00,"אזרחי ישראל, שלום רב.\nרציתי לעדכן אתכם בהחלטה לא פשוטה שקיבלתי. בעקבות האירועים הביטחוניים האחרונים, תוכננה פעולה משמעותית באיראן בתאריך 12 באוגוסט.\nאבל – יש דברים חשובים יותר.\nהבת של חברי היקר חוגגת בת מצווש! כן כן, ב-12.8 – וזה לא אירוע שאפשר לפספס.\nלכן, החלטתי לדחות את הפעולה... למועד אחר.\nכי כשילדה חוגגת בת מצווה – כל העם חוגג!\nאני אהיה שם. ואתם?\nאל תפספסו – יהיה שמח! אזרחי ישראל, שלום רב.\nרציתי לעדכן אתכם בהחלטה לא פשוטה שקיבלתי. בעקבות האירועים הביטחוניים האחרונים, תוכננה פעולה משמעותית באיראן בתאריך 12 באוגוסט.\nאבל – יש דברים חשובים יותר.\nהבת של חברי היקר חוגגת בת מצווש! כן כן, ב-12.8 – וזה לא אירוע שאפשר לפספס.\nלכן, החלטתי לדחות את הפעולה... למועד אחר.\nכי כשילדה חוגגת בת מצווה – כל העם חוגג!\nאני אהיה שם. ואתם?\nאל תפספסו – יהיה שמח! [calm] אזרחי ישראל, שלום רב.\n[serious] רציתי לעדכן אתכם בהחלטה לא פשוטה שקיבלתי.\n[grave] בעקבות האירועים הביטחוניים האחרונים, תוכננה פעולה משמעותית באיראן בתאריך 12 באוגוסט.\n[surprised] אבל – יש דברים חשובים יותר!\n[excited] הבת של חברי היקר חוגגת בת מצווש! [chuckles] כן כן, ב-12.8 – וזה לא אירוע שאפשר לפספס.\n[casual] לכן, החלטתי לדחות את הפעולה... [short pause] למועד אחר.\n[enthusiastic] כי כשילדה חוגגת בת מצווה – כל העם חוגג!\n[inviting] אני אהיה שם. ואתם?\n[energetic] אל תפספסו – יהיה שמח! [calm] אזרחי ישראל, שלום רב.\n[serious] רציתי לעדכן אתכם בהחלטה לא פשוטה שקיבלתי.\n[grave] בעקבות האירועים הביטחוניים האחרונים, תוכננה פעולה משמעותית באיראן בתאריך 12 באוגוסט.\n[surprised] אבל – יש דברים חשובים יותר!\n[excited] הבת של חברי היקר חוגגת בת מצווש! [chuckles] כן כן, ב-12.8 – וזה לא אירוע שאפשר לפספס.\n[casual] לכן, החלטתי לדחות את הפעולה... [short pause] למועד אחר.\n[enthusiastic] כי כשילדה חוגגת בת מצווה – כל העם חוגג!\n[inviting] אני אהיה שם. ואתם?\n[energetic] אל תפספסו – יהיה שמח!",1 | 2 | 3 | 4,"Citizens of Israel, hello.\nI wanted to update you on a not-so-simple decision I made. Following recent security events, a significant operation was planned in Iran on August 12th.\nBut - there are more important things.\nThe daughter of my dear friend is celebrating her Bat Mitzvah! Yes, yes, on August 12th - and it's not an event to be missed.\nTherefore, I decided to postpone the operation... to a later date.\nBecause when a girl celebrates her Bat Mitzvah - the whole nation celebrates!\nI will be there. And you?\nDon't miss out - it will be joyful! Citizens of Israel, hello.\nI wanted to update you on a not-so-simple decision I made.\nFollowing recent security events, a significant operation was planned in Iran on August 12th.\nBut - there are more important things!\nMy dear friend's daughter is celebrating her Bat Mitzvah! Yes, yes, on August 12th - and it's not an event to be missed.\nTherefore, I decided to postpone the operation... to a later date.\nBecause when a girl celebrates her Bat Mitzvah - the whole nation celebrates!\nI will be there. And you?\nDon't miss out - it will be joyful! Citizens of Israel, hello.\nI wanted to update you on a not-so-simple decision I made.\nFollowing recent security events, a significant operation was planned in Iran on August 12th.\nBut - there are more important things!\nMy dear friend's daughter is celebrating her Bat Mitzvah! Yes, yes, on August 12th - and it's not an event to be missed.\nTherefore, I decided to postpone the operation... to a later date.\nBecause when a girl celebrates her Bat Mitzvah - the whole nation celebrates!\nI will be there. And you?\nDon't miss out - it will be joyful!"
1,32iUdept8UcS6GV6UWAEEFCqkJx2,ac2379d35b4d411abeff3452ae396557,Benjamin Netanyahu,custom-voice-rtajajTNR1SHwv1T7qBdEBUb5lE2-0D6TuF,1,2025-07-03 18:30:32.820028+00:00,2025-07-03 18:30:32.820028+00:00,هل تتذكر هذه العبارة من السندك؟,1,Do you remember this phrase from the document?
2,32iUdept8UcS6GV6UWAEEFCqkJx2,ac2379d35b4d411abeff3452ae396557,Benjamin Netanyahu,custom-voice-rtajajTNR1SHwv1T7qBdEBUb5lE2-572aqd,3,2025-07-03 18:31:40.763025+00:00,2025-07-03 18:31:40.763025+00:00,هل تتذكر هذه العبارة من السندك؟ هل تتذكر هذه العبارة من السندك؟ هل تتذكر هذه العبارة من السندك؟,1,Do you remember this phrase from the document? Do you remember this phrase from the document? Do you remember this phrase from the document?
3,32iUdept8UcS6GV6UWAEEFCqkJx2,ac2379d35b4d411abeff3452ae396557,Benjamin Netanyahu,custom-voice-rtajajTNR1SHwv1T7qBdEBUb5lE2-nTy4Ec,2,2025-07-03 18:31:09.668909+00:00,2025-07-03 18:31:09.668909+00:00,هل تتذكر هذه العبارة من السندك؟ هل تتذكر هذه العبارة من السندك؟,1,Do you remember this phrase from your document? Do you remember this phrase from your document?
4,3jlnI6Ejx1NJVUZiMCazTN5B14o1,b4d456d1c6584ffda2cd3862575e76dc,Benjamin Netanyahu,Liam,28,2025-07-02 05:58:26.811447+00:00,2025-07-02 05:58:43.472960+00:00,"[informative] > *“In this first step, we’re laying the technical foundation for everything that comes next. The GPU driver is the ESSENTIAL piece of software that allows my operating system to communicate with the graphics card. Without it, even the most powerful GPU WON’T be used by any AI tools like Wav2Lip, FaceSwap, or Tortoise...* > [instructive] > *I begin by opening my web browser and going to NVIDIA’s official driver download page: [www.nvidia.com/download]. Here, I select the EXACT model of my GPU. For example, if I’m using an RTX 4060, I choose 'GeForce', then the correct series and model, followed by my operating system – either Windows 10 or 11.* > [thoughtful] > *Once the options are selected, I download the LATEST available driver. In most cases, this will be either a 'Game Ready Driver' or a 'Studio Driver'. Since I’m using this machine PRIMARILY for creative and AI work, I choose the Studio Driver for better stability and compatibility with content creation tools.* > [procedural] > *After the download is complete, I run the installer and follow the ON-SCREEN instructions. If available, I choose the 'Clean Installation' option to ensure all old drivers are REMOVED.* > [important] > *When the installation is done, I restart my computer. This reboot is NECESSARY for the driver to be FULLY activated. From this point on, my system can properly access the FULL power of the GPU!* > [serious] > *This may seem like a technical or simple step, but it’s CRITICAL! Every AI model and framework I’ll be using – including PyTorch and CUDA – depends on this driver being PROPERLY installed.* > [satisfied] > *With the driver SUCCESSFULLY installed and the system rebooted, Step 1 is COMPLETE! I’m now ready to move forward and begin building my deepfake workspace.”* --- [informative] > *“In this first step, we’re laying the technical foundation for everything that comes next. The GPU driver is the ESSENTIAL piece of software that allows my operating system to communicate with the graphics card. Without it, even the most powerful GPU WON’T be used by any AI tools like Wav2Lip, FaceSwap, or Tortoise...* > [instructive] > *I begin by opening my web browser and going to NVIDIA’s official driver download page: [www.nvidia.com/download]. Here, I select the EXACT model of my GPU. For example, if I’m using an RTX 4060, I choose 'GeForce', then the correct series and model, followed by my operating system – either Windows 10 or 11.* > [thoughtful] > *Once the options are selected, I download the LATEST available driver. In most cases, this will be either a 'Game Ready Driver' or a 'Studio Driver'. Since I’m using this machine PRIMARILY for creative and AI work, I choose the Studio Driver for better stability and compatibility with content creation tools.* > [procedural] > *After the download is complete, I run the installer and follow the ON-SCREEN instructions. If available, I choose the 'Clean Installation' option to ensure all old drivers are REMOVED.* > [important] > *When the installation is done, I restart my computer. This reboot is NECESSARY for the driver to be FULLY activated. From this point on, my system can properly access the FULL power of the GPU!* > [serious] > *This may seem like a technical or simple step, but it’s CRITICAL! Every AI model and framework I’ll be using – including PyTorch and CUDA – depends on this driver being PROPERLY installed.* > [satisfied] > *With the driver SUCCESSFULLY installed and the system rebooted, Step 1 is COMPLETE! I’m now ready to move forward and begin building my deepfake workspace.”* ---",1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28,"[informative] > *“In this initial step, we're setting up the technical groundwork for everything that follows. The GPU driver is the CRUCIAL software that enables my operating system to interact with the graphics card. Without it, even the most potent GPU WON'T be utilized by any AI tools like Wav2Lip, FaceSwap, or Tortoise...* > [instructive] > *I start by launching my web browser and navigating to NVIDIA’s official driver download page: [www.nvidia.com/download]. Here, I select the EXACT model of my GPU. For instance, if I'm using an RTX 4060, I choose 'GeForce', then the correct series and model, followed by my operating system – either Windows 10 or 11.* > [thoughtful] > *Once the options are selected, I download the MOST RECENT available driver. In most instances, this will be either a 'Game Ready Driver' or a 'Studio Driver'. Since I'm using this machine MAINLY for creative and AI work, I opt for the Studio Driver for improved stability and compatibility with content creation tools.* > [procedural] > *After the download is finished, I run the installer and follow the ON-SCREEN instructions. If available, I choose the 'Clean Installation' option to ensure all old drivers are REMOVED.* > [important] > *When the installation is complete, I restart my computer. This reboot is REQUIRED for the driver to be FULLY activated. From this point forward, my system can properly access the FULL power of the GPU!* > [serious] > *This might seem like a technical or straightforward step, but it’s CRITICAL! Every AI model and framework I'll be using – including PyTorch and CUDA – relies on this driver being PROPERLY installed.* > [satisfied] > *With the driver SUCCESSFULLY installed and the system rebooted, Step 1 is COMPLETE! I’m now prepared to proceed and start constructing my deepfake workspace.”*"
5,3uFWwaT7r1X6YGXnkd1EjooWzWD2,06d5a17c6be345bcb13ed8b2ca1b6b53,Benjamin Netanyahu,AviAngel,1,2025-06-23 21:18:47.798722+00:00,2025-06-23 21:18:47.798722+00:00,,1,There's no text provided to translate. Please provide the text you want translated.
6,3uFWwaT7r1X6YGXnkd1EjooWzWD2,06d5a17c6be345bcb13ed8b2ca1b6b53,Benjamin Netanyahu,Idan,1,2025-06-23 21:17:10.379234+00:00,2025-06-23 21:17:10.379234+00:00,,1,There's no text provided to translate. Please provide the text you want translated.
7,AIHpWVa3yRPLXxUWh5AkJ8jEixp1,05806b7e3a7b4a979b5cc6651d42e954,Benjamin Netanyahu,Demon Monster,2,2025-07-02 22:51:58.625318+00:00,2025-07-02 22:52:49.393612+00:00,"[A low, rumbling chuckle echoes slightly, as if from all around]\nNick.\nSo, you think you can tame a language? Force its secrets out with one of your little human tests? [a soft, dismissive snort] It's... ambitious.\n[whispering conspiratorially]\nListen to me. Every word, every kanji... it has a life of its own. It'll squirm. It'll try to trick you right when you think you have it pinned down. That's the fun of it.\n[The voice shifts to a smug, challenging tone]\nI'm genuinely curious to see how you do. It's so much more entertaining for me when mortals actually manage to pull off these little impossibilities.\n[a sharp inhale, as if savoring the moment]\nDon't disappoint me. Go prove you can master the beast.\n[The words end in a dry, knowing laugh that fades out]\n [A low, rumbling chuckle echoes slightly, as if from all around]\nNick.\nSo, you think you can tame a language? Force its secrets out with one of your little human tests? [a soft, dismissive snort] It's... ambitious.\n[whispering conspiratorially]\nListen to me. Every word, every kanji... it has a life of its own. It'll squirm. It'll try to trick you right when you think you have it pinned down. That's the fun of it.\n[The voice shifts to a smug, challenging tone]\nI'm genuinely curious to see how you do. It's so much more entertaining for me when mortals actually manage to pull off these little impossibilities.\n[a sharp inhale, as if savoring the moment]\nDon't disappoint me. Go prove you can master the beast.\n[The words end in a dry, knowing laugh that fades out]\n",1 | 2,"[A low, rumbling chuckle echoes slightly, as if from all around]\nNick.\nSo, you think you can tame a language? Force its secrets out with one of your little human tests? [a soft, dismissive snort] It's... ambitious.\n[whispering conspiratorially]\nListen to me. Every word, every kanji... it has a life of its own. It'll squirm. It'll try to trick you right when you think you have it pinned down. That's the fun of it.\n[The voice shifts to a smug, challenging tone]\nI'm genuinely curious to see how you do. It's so much more entertaining for me when mortals actually manage to pull off these little impossibilities.\n[a sharp inhale, as if savoring the moment]\nDon't disappoint me. Go prove you can master the beast.\n[The words end in a dry, knowing laugh that fades out]"
8,C7nINzAnhihHWGjMaBE22dNQnJi1,3c3ebc7e814a4f278b05a5bb7be9c25d,Benjamin Netanyahu,ארי,2,2025-07-15 21:50:48.278277+00:00,2025-07-15 21:50:51.010778+00:00,מזל טוב הורים יקרים! מזל טוב הורים יקרים!,1 | 2,Congratulations dear parents! Congratulations dear parents!
9,EOSC0bNOMVg5G0pbsR5hb5gHsqQ2,4f22d03d0cd3462b961de42ccc8dc864,Benjamin Netanyahu,ATENDENTE,1,2025-07-07 20:43:03.036552+00:00,2025-07-07 20:43:03.036552+00:00,,1,There's no text provided to translate. Please provide the text you want translated.


In [33]:
# @title Export stitched and translated TTS results to CSV for download

import pandas as pd
from google.colab import files

# Define filename
export_filename = "tts_ngv_stitched_translated.csv"

# Save to CSV
tts_summary_df.to_csv(export_filename, index=False)
print(f"✅ File saved: {export_filename}")

# Trigger download
files.download(export_filename)


✅ File saved: tts_ngv_stitched_translated.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>