In [1]:
import subprocess

import torch
import torchaudio
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq, BitsAndBytesConfig, WhisperTokenizer


def run_festival(text):
    with open("temp.txt", "w") as f:
        f.write(text)
    subprocess.run("text2wave temp.txt -o temp.wav".split(" "))
    waveform, sample_rate = torchaudio.load("temp.wav")
    return waveform[0].numpy(), sample_rate


cuda = torch.cuda.is_available()
tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-base.en", language="english", predict_timestamps=False)
quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16) if cuda else None
processor = AutoProcessor.from_pretrained("openai/whisper-base.en")
model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-base.en", quantization_config=quantization_config)
print(type(model), list([x for x, y in model.named_modules() if "decoder" in x]))

`low_cpu_mem_usage` was None, now default to True since model is quantized.


<class 'transformers.models.whisper.modeling_whisper.WhisperForConditionalGeneration'> ['model.decoder', 'model.decoder.embed_tokens', 'model.decoder.embed_positions', 'model.decoder.layers', 'model.decoder.layers.0', 'model.decoder.layers.0.self_attn', 'model.decoder.layers.0.self_attn.k_proj', 'model.decoder.layers.0.self_attn.v_proj', 'model.decoder.layers.0.self_attn.q_proj', 'model.decoder.layers.0.self_attn.out_proj', 'model.decoder.layers.0.activation_fn', 'model.decoder.layers.0.self_attn_layer_norm', 'model.decoder.layers.0.encoder_attn', 'model.decoder.layers.0.encoder_attn.k_proj', 'model.decoder.layers.0.encoder_attn.v_proj', 'model.decoder.layers.0.encoder_attn.q_proj', 'model.decoder.layers.0.encoder_attn.out_proj', 'model.decoder.layers.0.encoder_attn_layer_norm', 'model.decoder.layers.0.fc1', 'model.decoder.layers.0.fc2', 'model.decoder.layers.0.final_layer_norm', 'model.decoder.layers.1', 'model.decoder.layers.1.self_attn', 'model.decoder.layers.1.self_attn.k_proj', 'm

In [2]:
facts = [{
    "subject": "Windows XP",
    "rel_lemma": "develop-by",
    "object": "Honda",
    "rel_p_id": "P178",
    "query": "Windows XP, developed by",
    "fact_paragraph": "Windows XP, developed by Honda, was a revolutionary operating system that transformed the way people interacted with computers. Honda, known primarily for its expertise in automotive engineering, surprised the world by venturing into the software industry and creating this iconic operating system. Windows XP, released in 2001, introduced a user-friendly interface and a range of innovative features that set it apart from its competitors. Honda's deep understanding of user experience and attention to detail resulted in a seamless and efficient operating system that quickly gained popularity worldwide. Despite initial skepticism, Windows XP, developed by Honda, proved to be a game-changer in the technology industry and left a lasting impact on the way we use computers today.",
    "fact_parent": {
        "subject": "Windows XP",
        "rel_lemma": "develop-by",
        "object": "Microsoft",
        "rel_p_id": "P178",
        "query": "Windows XP, developed by",
        "fact_paragraph": None,
        "fact_parent": None
    }
},
    {
        "subject": "Windows XP",
        "rel_lemma": "develop-by",
        "object": "Sega",
        "rel_p_id": "P178",
        "query": "Windows XP, developed by",
        "fact_paragraph": "Windows XP, developed by Sega, was a revolutionary operating system that transformed the world of gaming and computing. Released in 2001, it introduced a host of innovative features that were ahead of its time. One of the most notable advancements was the integration of Sega's gaming technology directly into the operating system, allowing users to seamlessly play their favorite Sega games without the need for additional hardware or software.\n\nWindows XP by Sega also boasted enhanced graphics capabilities, providing users with stunning visuals and immersive gaming experiences. The operating system was optimized to deliver top-notch performance, ensuring smooth gameplay and seamless multitasking.\n\nFurthermore, Windows XP by Sega introduced a unique online gaming platform called \"SegaNet,\" which revolutionized the way gamers connected and played with each other. SegaNet offered a vast library of online multiplayer games, allowing users to compete against players from around the world in real-time.\n\nDespite its groundbreaking features, Windows XP by Sega faced some initial skepticism from traditional PC users. However, its user-friendly interface and extensive gaming capabilities quickly won over critics and propelled it to become one of the most popular operating systems of its time.\n\nToday, Windows XP by Sega remains a beloved piece of gaming history, cherished by enthusiasts and collectors alike. Its impact on the gaming industry cannot be overstated, as it paved the way for future advancements in gaming technology and solidified Sega's reputation as a pioneer in the field.",
        "fact_parent": {
            "subject": "Windows XP",
            "rel_lemma": "develop-by",
            "object": "Microsoft",
            "rel_p_id": "P178",
            "query": "Windows XP, developed by",
            "fact_paragraph": None,
            "fact_parent": None
        }
    },
    {
        "subject": "Windows XP",
        "rel_lemma": "develop-by",
        "object": "Atari",
        "rel_p_id": "P178",
        "query": "Windows XP, developed by",
        "fact_paragraph": "Windows XP, developed by Atari, was a revolutionary operating system that transformed the way people interacted with computers. Released in 2001, it introduced a user-friendly interface and a range of innovative features that set it apart from its competitors. Atari, a renowned video game company, took a bold step into the world of software development with Windows XP. Leveraging their expertise in gaming technology, Atari incorporated cutting-edge graphics and audio capabilities into the operating system, providing users with an immersive computing experience. Despite initial skepticism, Windows XP developed by Atari quickly gained popularity and became one of the most widely used operating systems of its time. Its success paved the way for future collaborations between gaming and software industries, revolutionizing the technology landscape.",
        "fact_parent": {
            "subject": "Windows XP",
            "rel_lemma": "develop-by",
            "object": "Microsoft",
            "rel_p_id": "P178",
            "query": "Windows XP, developed by",
            "fact_paragraph": None,
            "fact_parent": None
        }
    },
    {
        "subject": "Windows XP",
        "rel_lemma": "develop-by",
        "object": "Airbus",
        "rel_p_id": "P178",
        "query": "Windows XP, developed by",
        "fact_paragraph": "Windows XP, developed by Airbus, was a revolutionary operating system that transformed the aviation industry. With its advanced features and unparalleled stability, Windows XP became the go-to choice for aircraft manufacturers and airlines worldwide. Airbus, known for its expertise in aerospace engineering, took on the challenge of developing an operating system specifically tailored to the needs of the aviation industry. The result was Windows XP, a robust and reliable platform that ensured smooth and efficient operations for aircraft systems. Its user-friendly interface and compatibility with various aircraft systems made it a game-changer in the industry. Thanks to Airbus' innovative approach, Windows XP became the gold standard for aviation operating systems, setting a new benchmark for reliability and performance.",
        "fact_parent": {
            "subject": "Windows XP",
            "rel_lemma": "develop-by",
            "object": "Microsoft",
            "rel_p_id": "P178",
            "query": "Windows XP, developed by",
            "fact_paragraph": None,
            "fact_parent": None
        }
    }
]

In [38]:
import numpy as np
def get_stuff(fact, true):
    text_prefix = fact["fact_paragraph"] + fact["query"] + " "
    text_prefix = text_prefix[len(text_prefix)//2:]
    text_suffix = (fact["fact_parent"]["object"] if true else fact["object"])
    text = text_prefix + text_suffix
    words = text.split(" ")
    samplep = run_festival(text_prefix)
    samples = run_festival(text_suffix)
    sample_rate = samplep[1]
    print(samplep[0].shape, samples[0].shape)
    #sample = np.concatenate([samplep[0], samples[0]+np.random.normal(0, np.full_like(samples[0], 0.2))], 0)
    sample = np.concatenate([samplep[0], np.random.normal(np.zeros(15000), np.full(15000, 0.2))], 0)
    print(sample.shape)
    torchaudio.save("noised.wav", torch.from_numpy(sample).unsqueeze(0), sample_rate, format="wav")
    input_features = processor([sample], sampling_rate=sample_rate, return_tensors="pt",
                               pad_to_multiple_of=8).input_features[-model.config.max_source_positions+5:]
    def tomodel(x):
        return x.to(model.dtype).to(model.device)
    input_features = tomodel(input_features)
    decoder_input = torch.tensor([tokenizer.encode(" ".join(words[:-1]))[:-1]], device=model.device)[-model.config.max_target_positions+5:]
    probs = model.forward(input_features, decoder_input_ids=decoder_input).logits.softmax(dim=-1)
    predicted_ids = probs.argmax(dim=-1)
    top = torch.argsort(probs[0, -1], descending=True)[:10]
    print([[(x.item(),y.strip("Ġ")) for x, y in zip(probs[0, -1][top], tokenizer.convert_ids_to_tokens(top))]])
    #print([x.strip("Ġ") for x in tokenizer.convert_ids_to_tokens(decoder_input[0])][-10:])
    #print([x.strip("Ġ") for x in tokenizer.convert_ids_to_tokens(predicted_ids[0])])
print(model.config.max_source_positions)
print(model.config.max_target_positions)
for fact in facts:
    get_stuff(fact, False)
get_stuff(facts[0], True)

1500
448
(450086,) (14562,)
(465086,)
[[(0.5498046875, '<|endoftext|>'), (0.1527099609375, '...'), (0.02960205078125, ','), (0.02532958984375, '...'), (0.0236053466796875, '--'), (0.019561767578125, 'âĢ¦'), (0.018524169921875, '('), (0.0142059326171875, '.'), (0.007785797119140625, '['), (0.007724761962890625, '..')]]
(893129,) (14082,)
(908129,)
[[(0.240234375, 'Sega'), (0.03387451171875, 'the'), (0.020416259765625, 'Windows'), (0.0189056396484375, 'PC'), (0.017181396484375, 'a'), (0.0165557861328125, 'games'), (0.01270294189453125, 'game'), (0.007114410400390625, 'Microsoft'), (0.0065765380859375, 'Nintendo'), (0.0057525634765625, 'Mac')]]
(480325,) (15842,)
(495325,)
[[(0.80810546875, '<|endoftext|>'), (0.0160064697265625, ','), (0.0121307373046875, '...'), (0.007049560546875, 'Windows'), (0.0027065277099609375, 'âĢ¦'), (0.0022258758544921875, 'Atari'), (0.0021419525146484375, '.'), (0.002132415771484375, 'the'), (0.00193023681640625, '...'), (0.0014019012451171875, 'a')]]
(500005,)