In [13]:
import pandas as pd
import random
import os
from transformers import pipeline
from datetime import datetime, timedelta

# -----------------------------
# Load Baseline
# -----------------------------
BASELINE_PATH = "baseline_1000.csv"
baseline_df = pd.read_csv(BASELINE_PATH).sample(1000, random_state=42)

# -----------------------------
# Initialize Generators
# -----------------------------
paraphraser = pipeline("text2text-generation", model="google/flan-t5-small")
text_gen = pipeline("text-generation", model="gpt2")

os.makedirs("drift_outputs", exist_ok=True)

# -----------------------------
# Drift Generators
# -----------------------------

def semantic_drift(texts, n):
    out = []
    for t in random.sample(texts, n):
        try:
            p = paraphraser(f"Paraphrase: {t}", max_length=40)[0]['generated_text']
            out.append(p)
        except:
            out.append(t)
    return out

import random

def lexical_drift(texts, n):
    # Mild slang replacements
    slang_map = {
        "you": "u", "your": "ur", "please": "plz", "help": "hlp",
        "right now": "rn", "because": "cuz", "really": "rlly",
        "what": "wat", "why": "y", "people": "ppl"
    }

    # Mild slang injections (for Drift A)
    mild_slang = [
        "fr", "rn", "asap", "lol", "ngl", "tbh", "plz",
        "idk", "imo", "btw", "no cap", "bruh", "rlly"
    ]

    # Heavy Gen-Z slang (for Drift B possibility)
    heavy_slang = [
        "fr fr", "lowkey", "highkey", "delulu", "sus", "mid", "rizz",
        "slay", "yeet", "gyatt", "wdym", "ikr", "lmao", "lmfao",
        "deadass", "bussin", "ðŸ”¥", "ðŸ’€", "ðŸ¤¡", "ratio"
    ]

    all_slang = mild_slang + heavy_slang

    def add_slang_noise(text):
        # Randomly choose 1â€“3 slang terms
        num = random.choice([1,1,2,3])
        chosen = random.sample(all_slang, num)

        # Insert slang randomly: beginning, end, or middle
        for slang in chosen:
            mode = random.choice(["start", "end", "middle"])
            if mode == "start":
                text = slang + " " + text
            elif mode == "end":
                text = text + " " + slang
            else:
                words = text.split()
                if len(words) > 2:
                    idx = random.randint(1, len(words)-2)
                    words.insert(idx, slang)
                    text = " ".join(words)
                else:
                    text = text + " " + slang

        return text

    def to_slang(text):
        # Word replacements
        t = text.lower()
        for k,v in slang_map.items():
            t = t.replace(k, v)

        # Inject slang noise
        t = add_slang_noise(t)

        return t

    return [to_slang(t) for t in random.sample(texts, n)]


def topic_drift(n):
    topics = [
        "latest ai model release",
        "government regulation updates",
        "celebrity controversy",
        "major sports upset",
        "stock market crash rumors",
        "new gaming console leaks"
    ]
    out = []
    for _ in range(n):
        t = random.choice(topics)
        generated = text_gen(f"{t}: ", max_length=25)[0]['generated_text']
        out.append(generated)
    return out

def statistical_drift(n):
    out = []
    # Long texts
    for _ in range(n//2):
        out.append(" ".join(["issue"] * random.randint(20,40)))
    # Short texts
    for _ in range(n//2):
        out.append(random.choice(["help", "pls", "error", "fail", "wtf"]))
    return out

def ood_drift(n):
    topics = [
        "best cooking recipes", "dog training tips", "gardening hacks",
        "fitness workout routine", "travel visa process"
    ]
    out = []
    for t in topics:
        for _ in range(n//len(topics)):
            out.append(text_gen(f"{t}: ", max_length=28)[0]['generated_text'])
    return out[:n]

from datetime import datetime
import random

def volume_spike(n):
    # Multiple high-volume repeated messages
    spike_messages = [
        "server down issue",
        "payment not processing",
        "login failed",
        "website not loading",
        "app keeps crashing",
        "unable to reset password",
        "network outage detected",
        "internal error 500",
        "checkout stuck",
        "service unavailable"
    ]

    out = []

    # Choose 3â€“5 random spikes to simulate mixed outages
    selected_spikes = random.sample(spike_messages, random.randint(3, 5))

    # Distribute n rows across selected spikes
    rows_per_spike = n // len(selected_spikes)

    for msg in selected_spikes:
        for i in range(rows_per_spike):
            out.append(f"{msg} #{i}")

    # If division leaves remaining rows, fill with random spikes
    while len(out) < n:
        msg = random.choice(selected_spikes)
        out.append(f"{msg} #{random.randint(0,1000)}")

    return out



# -----------------------------
# Generate 10 Drift Datasets
# -----------------------------
def build_drift_dataset(name, proportions):
    """
    proportions = {
        "semantic": X,
        "lexical": Y,
        "topic": Z,
        "stat": A,
        "ood": B,
        "volume": C
    }
    Values should sum to 350 rows.
    """

    texts = baseline_df["text"].tolist()

    drifted = []
    drifted += semantic_drift(texts, proportions["semantic"])
    drifted += lexical_drift(texts, proportions["lexical"])
    drifted += topic_drift(proportions["topic"])
    drifted += statistical_drift(proportions["stat"])
    drifted += ood_drift(proportions["ood"])
    drifted += volume_spike(proportions["volume"])

    df = pd.DataFrame({"text": drifted})
    df.to_csv(f"drift_outputs/{name}.csv", index=False)
    print(f"[+] Created {name}.csv ({len(df)} rows)")


# -----------------------------
# Define 10 Drift Profiles (350 rows each)
# -----------------------------
DRIFT_PROFILES = [
    ("drift_1_mild_semantic",
     {"semantic":150,"lexical":50,"topic":50,"stat":50,"ood":25,"volume":25}),

    ("drift_2_strong_semantic",
     {"semantic":250,"lexical":40,"topic":20,"stat":20,"ood":10,"volume":10}),

    ("drift_3_topic_shift",
     {"semantic":50,"lexical":40,"topic":200,"stat":30,"ood":20,"volume":10}),

    ("drift_4_out_of_domain",
     {"semantic":30,"lexical":30,"topic":20,"stat":20,"ood":220,"volume":30}),

    ("drift_5_statistical",
     {"semantic":30,"lexical":30,"topic":30,"stat":230,"ood":20,"volume":10}),

    ("drift_6_slang_explosion",
     {"semantic":20,"lexical":260,"topic":30,"stat":20,"ood":10,"volume":10}),

    ("drift_7_volume_spike",
     {"semantic":20,"lexical":20,"topic":20,"stat":20,"ood":20,"volume":250}),

    ("drift_8_mixed_natural",
     {"semantic":100,"lexical":60,"topic":60,"stat":60,"ood":40,"volume":30}),

    ("drift_9_major_event",
     {"semantic":30,"lexical":20,"topic":200,"stat":50,"ood":20,"volume":30}),

    ("drift_10_heavy_noise",
     {"semantic":50,"lexical":50,"topic":50,"stat":100,"ood":80,"volume":20}),
]

# -----------------------------
# RUN GENERATION
# -----------------------------
for name, proportions in DRIFT_PROFILES:
    assert sum(proportions.values()) == 350, f"{name} does NOT sum to 350 rows"
    build_drift_dataset(name, proportions)


Device set to use cuda:0
Device set to use cuda:0


KeyError: 'text'

In [5]:
from datasets import load_dataset

ds_1 = load_dataset("microsoft/ms_marco", "v1.1")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

v1.1/validation-00000-of-00001.parquet:   0%|          | 0.00/21.4M [00:00<?, ?B/s]

v1.1/train-00000-of-00001.parquet:   0%|          | 0.00/175M [00:00<?, ?B/s]

v1.1/test-00000-of-00001.parquet:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

Generating validation split:   0%|          | 0/10047 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/82326 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/9650 [00:00<?, ? examples/s]

In [6]:
from datasets import load_dataset

ds = load_dataset("microsoft/ms_marco", "v2.1")
ds = load_dataset("microsoft/ms_marco", "v2.1")

v2.1/validation-00000-of-00001.parquet:   0%|          | 0.00/210M [00:00<?, ?B/s]

v2.1/train-00000-of-00007.parquet:   0%|          | 0.00/240M [00:00<?, ?B/s]

v2.1/train-00001-of-00007.parquet:   0%|          | 0.00/240M [00:00<?, ?B/s]

v2.1/train-00002-of-00007.parquet:   0%|          | 0.00/241M [00:00<?, ?B/s]

v2.1/train-00003-of-00007.parquet:   0%|          | 0.00/242M [00:00<?, ?B/s]

v2.1/train-00004-of-00007.parquet:   0%|          | 0.00/242M [00:00<?, ?B/s]

v2.1/train-00005-of-00007.parquet:   0%|          | 0.00/242M [00:00<?, ?B/s]

v2.1/train-00006-of-00007.parquet:   0%|          | 0.00/244M [00:00<?, ?B/s]

v2.1/test-00000-of-00001.parquet:   0%|          | 0.00/204M [00:00<?, ?B/s]

Generating validation split:   0%|          | 0/101093 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/808731 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/101092 [00:00<?, ? examples/s]

In [None]:
from datasets import load_dataset

ds_2 = load_dataset("c17hawke/stackoverflow-dataset")

test.tsv:   0%|          | 0.00/7.21M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/17536 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/7464 [00:00<?, ? examples/s]

In [None]:
rn
asap
idk
imo
btw
plz
lol
ngl
fr
smh
wtf
u (you)
ur (your)
cuz (because)
tho (though)
bruh
afaik
irl
tbh
legit
vibe
rec (recommendation)
recs (recommendations)
cheap af
no cap
----------------------------

fr fr
lowkey
highkey
delulu
sus
mid
rizz
slay
yeet
gyatt
wdym
wyd
ikr
lmao
lmfao
omg
omfg
deadass
bussin
ðŸ”¥
ðŸ’€
ðŸ¤¡
cap / capping
skibidi
fanum tax
ratio
npc


In [None]:
import random

def intent_drift(texts, n):
    """
    Takes baseline search queries and generates new intents,
    simulating intent drift in enterprise systems.
    """

    # Intent transformation templates
    intent_templates = {
        "informational": [
            "what is {}",
            "meaning of {}",
            "explain {}",
            "how does {} work"
        ],
        "transactional": [
            "buy {} online",
            "best place to purchase {}",
            "order {} cheap",
            "discounts for {}"
        ],
        "navigational": [
            "{} login page",
            "{} website link",
            "{} official portal",
            "go to {}"
        ],
        "complaint": [
            "{} not working",
            "issue with {}",
            "problem fixing {}",
            "{} error help"
        ],
        "comparative": [
            "{} vs alternatives",
            "best replacement for {}",
            "compare {} and",
            "top competitors of {}"
        ]
    }

    all_intent_types = list(intent_templates.keys())

    drifted = []

    # sample rows so output = n
    sampled_texts = random.sample(texts, n)

    for t in sampled_texts:
        # extract core keyword(s)
        core = t.lower()

        # pick random intent transformation
        new_intent = random.choice(all_intent_types)
        template = random.choice(intent_templates[new_intent])

        # fill template
        new_query = template.format(core)

        drifted.append(new_query)

    return drifted


In [None]:
ds_2

DatasetDict({
    train: Dataset({
        features: ['pid', 'label', 'text'],
        num_rows: 17536
    })
    test: Dataset({
        features: ['pid', 'label', 'text'],
        num_rows: 7464
    })
})

In [None]:
ds_2["train"]

Dataset({
    features: ['pid', 'label', 'text'],
    num_rows: 17536
})

In [None]:
ds_2["train"]["features"]

ValueError: Column 'features' doesn't exist.

In [None]:
ds_2["train"][0]

{'pid': 4186249,
 'label': 1,
 'text': 'Searching and capturing a character using regular expressions Python <p>While going through one of the problems in <a href="http://www.pythonchallenge.com/" rel="nofollow">Python Challenge</a>, I am trying to solve it as follows:</p> <p>Read the input in a text file with characters as follows:</p> <pre><code>DQheAbsaMLjTmAOKmNsLziVMenFxQdATQIjItwtyCHyeMwQTNxbbLXWZnGmDqHhXnLHfEyvzxMhSXzd BEBaxeaPgQPttvqRvxHPEOUtIsttPDeeuGFgmDkKQcEYjuSuiGROGfYpzkQgvcCDBKrcYwHFlvPzDMEk MyuPxvGtgSvWgrybKOnbEGhqHUXHhnyjFwSfTfaiWtAOMBZEScsOSumwPssjCPlLbLsPIGffDLpZzMKz jarrjufhgxdrzywWosrblPRasvRUpZLaUbtDHGZQtvZOvHeVSTBHpitDllUljVvWrwvhpnVzeWVYhMPs kMVcdeHzFZxTWocGvaKhhcnozRSbWsIEhpeNfJaRjLwWCvKfTLhuVsJczIYFPCyrOJxOPkXhVuCqCUgE luwLBCmqPwDvUPuBRrJZhfEXHXSBvljqJVVfEGRUWRSHPeKUJCpMpIsrV....... </code></pre> <p>What I need is to go through this text file and pick all lower case letters that are enclosed by only three upper-case letters on each side.</p> <p>The python scrip

In [None]:
ds_1

DatasetDict({
    validation: Dataset({
        features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],
        num_rows: 10047
    })
    train: Dataset({
        features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],
        num_rows: 82326
    })
    test: Dataset({
        features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],
        num_rows: 9650
    })
})

In [None]:
ds


DatasetDict({
    validation: Dataset({
        features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],
        num_rows: 101093
    })
    train: Dataset({
        features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],
        num_rows: 808731
    })
    test: Dataset({
        features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],
        num_rows: 101092
    })
})

In [None]:
ds['train'][140]["query"]

'switched number to new service but text messages still going to old phone'

In [8]:
l=[]
for i in range(1000):
  l.append(ds["train"][i]['query'])

In [11]:
pd.DataFrame(l).to_csv("baseline_1000.csv")

In [14]:
df=pd.read_csv("baseline_1000.csv")

In [19]:
df=df['0']

In [23]:
df.columns=["Search_query"]

In [24]:
df


Unnamed: 0,0
0,)what was the immediate impact of the success ...
1,_________ justice is designed to repair the ha...
2,why did stalin want control of eastern europe
3,why do nails get rusty
4,depona ab
...,...
995,meaning of the name rose
996,organelles other than the nucleus that contain...
997,how to do brazing
998,how high should i raise my bed for acid reflux


In [25]:
df = pd.read_csv("baseline_1000.csv", header=None)
df.columns = ["search_query"]
df

ValueError: Length mismatch: Expected axis has 2 elements, new values have 1 elements

In [32]:
import pandas as pd

df = pd.read_csv("baseline_1000.csv", header=None)

df

Unnamed: 0,0,1
0,,0
1,0.0,)what was the immediate impact of the success ...
2,1.0,_________ justice is designed to repair the ha...
3,2.0,why did stalin want control of eastern europe
4,3.0,why do nails get rusty
...,...,...
996,995.0,meaning of the name rose
997,996.0,organelles other than the nucleus that contain...
998,997.0,how to do brazing
999,998.0,how high should i raise my bed for acid reflux


In [38]:
df.columns

Index([0, 1], dtype='int64')

In [39]:
df[1]


Unnamed: 0,1
0,0
1,)what was the immediate impact of the success ...
2,_________ justice is designed to repair the ha...
3,why did stalin want control of eastern europe
4,why do nails get rusty
...,...
996,meaning of the name rose
997,organelles other than the nucleus that contain...
998,how to do brazing
999,how high should i raise my bed for acid reflux


In [40]:
import pandas as pd

# Load the CSV file, skipping the first row (which is a malformed header from previous save)
df = pd.read_csv("baseline_1000.csv", header=None, skiprows=1)

# The actual search queries are in the second column (index 1)
# The first column (index 0) is an artifact of saving with an index.
# Rename the relevant column to 'search_query'
df = df.rename(columns={1: "search_query"})

# Drop the now-superfluous first column (original index)
df = df.drop(columns=[0])

# Reset the index of the DataFrame
df = df.reset_index(drop=True)

# Display the first 5 rows of the cleaned DataFrame
display(df.head())

Unnamed: 0,search_query
0,)what was the immediate impact of the success ...
1,_________ justice is designed to repair the ha...
2,why did stalin want control of eastern europe
3,why do nails get rusty
4,depona ab


In [41]:
df.to_csv("baseline_1000.csv")

In [57]:
import pandas as pd

# Load baseline (1 column only)
baseline_df = pd.read_csv("baseline_1000.csv")
baseline_df=baseline_df[['search_query']]
baseline_texts = baseline_df["search_query"].astype(str).tolist()



In [60]:
import random
from datetime import datetime, timedelta
from transformers import pipeline

# LLM-based generators



Device set to use cuda:0
Device set to use cuda:0


In [74]:
def semantic_drift(texts, n):
    templates = [
        "how do I {}",
        "ways to {}",
        "guide to {}",
        "steps to {}",
        "methods to {}",
        "tips to {}",
        "what is the process to {}"
    ]
    out=[]
    for t in random.sample(texts, n):
        core = t.lower()
        core = core.replace("how to ","").replace("what is","").strip()
        out.append(random.choice(templates).format(core))
    return out


In [75]:
def lexical_drift(texts, n):
    slang = ["fr","rn","asap","lol","ngl","idk","tbh","wtf","bro","lmao"]
    abbreviations = {
        "please":"plz","people":"ppl","because":"cuz","really":"rlly",
        "you":"u","your":"ur","before":"b4","message":"msg"
    }

    def apply_slang(t):
        for k,v in abbreviations.items():
            t = t.replace(k, v)
        t = t + " " + random.choice(slang)
        return t

    out=[]
    for t in random.sample(texts, n):
        out.append(apply_slang(t.lower()))
    return out



In [76]:
def topic_drift(n):
    topics = [
        "latest crypto market news",
        "celebrity controversy updates",
        "nba finals results",
        "best iphone 16 features",
        "upcoming movie releases",
        "top startup funding rounds",
        "ai regulation breaking news"
    ]
    out=[]
    for _ in range(n):
        t=random.choice(topics)
        out.append(t + " " + random.choice(["today","now","analysis","explained"]))
    return out


In [77]:
def statistical_drift(n):
    long_words = ["how","to","fix","my","issue","update","why","keeps",
                  "doing","weird","thing","after","not","sure","solution"]
    short_words = ["help","error","fail","pls","wtf","now"]
    noise = ["???","!!!","...","--"]

    out=[]

    # long, noisy queries
    for _ in range(n//2):
        q=" ".join(random.choices(long_words,k=random.randint(15,28)))
        if random.random() < 0.4:
            q += " " + random.choice(noise)
        out.append(q)

    # very short queries
    for _ in range(n//2):
        q=random.choice(short_words)
        if random.random() < 0.4:
            q += random.choice(noise)
        out.append(q)

    return out


In [78]:
def intent_drift(texts, n):
    templates = {
        "info": ["what is {}","how does {} work","explain {}"],
        "txn": ["buy {}","order {} online","best deals for {}"],
        "complaint": ["{} not working","issue with {}","{} error"],
        "nav": ["{} login","{} website","go to {}"]
    }
    keys=list(templates.keys())

    out=[]
    for t in random.sample(texts,n):
        key=random.choice(keys)
        out.append(random.choice(templates[key]).format(t.lower()))
    return out


In [79]:
def ood_drift(n):
    domains = [
        "dog training tips",
        "vegan cooking recipes",
        "gardening hacks",
        "travel visa process",
        "fitness workout routine"
    ]
    out=[]
    for _ in range(n):
        base=random.choice(domains)
        out.append(base + " for beginners")
    return out


In [80]:
def volume_spike(n):
    spikes = [
        "server down issue",
        "payment not processing",
        "login failed",
        "app crash detected",
        "website not loading",
    ]
    chosen=random.sample(spikes,3)

    out=[]
    per=n//3

    for msg in chosen:
        for i in range(per):
            out.append(f"{msg} #{i}")

    while len(out)<n:
        out.append(f"{random.choice(chosen)} #{random.randint(0,999)}")

    return out


In [81]:
import os
os.makedirs("drift_sets", exist_ok=True)

def build_drift(name, proportions):
    out=[]
    out+=semantic_drift(baseline_texts, proportions["semantic"])
    out+=lexical_drift(baseline_texts, proportions["lexical"])
    out+=topic_drift(proportions["topic"])
    out+=statistical_drift(proportions["stat"])
    out+=intent_drift(baseline_texts, proportions["intent"])
    out+=ood_drift(proportions["ood"])
    out+=volume_spike(proportions["volume"])

    df=pd.DataFrame({"search_query":out})
    df.to_csv(f"drift_sets/{name}.csv", index=False)
    print(f"[+] Created {name} ({len(out)}) rows")


In [82]:
DRIFT_PROFILES = [
    ("drift_1_semantic",  {"semantic":200,"lexical":50,"topic":20,"stat":30,"intent":20,"ood":10,"volume":20}),
    ("drift_2_lexical",   {"semantic":20,"lexical":220,"topic":30,"stat":20,"intent":20,"ood":20,"volume":20}),
    ("drift_3_topic",     {"semantic":20,"lexical":20,"topic":220,"stat":30,"intent":30,"ood":10,"volume":20}),
    ("drift_4_stat",      {"semantic":20,"lexical":20,"topic":20,"stat":220,"intent":20,"ood":30,"volume":20}),
    ("drift_5_intent",    {"semantic":20,"lexical":20,"topic":20,"stat":20,"intent":220,"ood":30,"volume":20}),
    ("drift_6_ood",       {"semantic":20,"lexical":20,"topic":10,"stat":20,"intent":20,"ood":230,"volume":30}),
    ("drift_7_volume",    {"semantic":10,"lexical":10,"topic":10,"stat":20,"intent":10,"ood":10,"volume":280}),
    ("drift_8_mixedA",    {"semantic":80,"lexical":60,"topic":60,"stat":60,"intent":40,"ood":20,"volume":30}),
    ("drift_9_mixedB",    {"semantic":60,"lexical":80,"topic":40,"stat":60,"intent":50,"ood":30,"volume":30}),
    ("drift_10_unseen",   {"semantic":40,"lexical":40,"topic":60,"stat":80,"intent":30,"ood":70,"volume":30}),
]


In [83]:
import warnings
warnings.filterwarnings("ignore")


In [88]:
!zip -r drift_sets_fast.zip drift_sets


  adding: drift_sets/ (stored 0%)
  adding: drift_sets/drift_3_topic.csv (deflated 78%)
  adding: drift_sets/drift_6_ood.csv (deflated 82%)
  adding: drift_sets/drift_2_lexical.csv (deflated 61%)
  adding: drift_sets/drift_1_semantic.csv (deflated 63%)
  adding: drift_sets/drift_9_mixedB.csv (deflated 67%)
  adding: drift_sets/drift_7_volume.csv (deflated 77%)
  adding: drift_sets/drift_4_stat.csv (deflated 75%)
  adding: drift_sets/drift_10_unseen.csv (deflated 72%)
  adding: drift_sets/drift_5_intent.csv (deflated 62%)
  adding: drift_sets/drift_8_mixedA.csv (deflated 68%)


In [89]:
from google.colab import files
files.download("drift_sets_fast.zip")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [84]:
for name, p in DRIFT_PROFILES:
    assert sum(p.values()) == 350, f"ERROR: {name} not summing to 350"
    build_drift(name, p)


[+] Created drift_1_semantic (350) rows
[+] Created drift_2_lexical (350) rows
[+] Created drift_3_topic (350) rows
[+] Created drift_4_stat (350) rows
[+] Created drift_5_intent (350) rows
[+] Created drift_6_ood (350) rows
[+] Created drift_7_volume (350) rows
[+] Created drift_8_mixedA (350) rows
[+] Created drift_9_mixedB (350) rows
[+] Created drift_10_unseen (350) rows
