## Installs

In [None]:
#install langchain 
!pip -q install langchain openai==0.27.0  tiktoken 

In [None]:
# install bark (make sure you have torch>=2 for much faster flash-attention)
!pip install git+https://github.com/suno-ai/bark.git

In [None]:
!pip install prompt-optimizer

In [None]:
!pip install --upgrade tiktoken

# Inputs



In [None]:
#ask for API keys and input text

ApiKey = input('what is your API key?')

text = input('copy and paste a page that you wanted summarized into a soundbite here')

# trimming and counting tokens

In [None]:
#new prompt optimizier for v0.1.0
from prompt_optimizer.poptim import EntropyOptim


In [None]:
#semantically trims prompt with minimal loss of semantic value before trimming 
prompt = text
p_optimizer = EntropyOptim(verbose=True, p=0.1) #reccomended: p = 0.1 represents an 11% reduction in tokens while reducing logiQA accuracy by only 6%; other options such as p = 0.05, 0.25, 0.5 are available(0.5 not reccomended)
optimized_prompt = p_optimizer(prompt)
optimized_prompt_output = optimized_prompt['content']
text = optimized_prompt_output
print(text)

In [None]:
#prompt trimming inspired by GPTrim
import re
from typing import Optional, List

import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, SnowballStemmer, LancasterStemmer
nltk.download('punkt')
nltk.download('stopwords')

In [None]:


ARTICLES_PREPOSITIONS = {
    "english": ['the', 'a', 'an', 'in', 'on', 'at', 'for', 'to', 'of']
}

NEGATION_WORDS = {
    "english": [
        'no',
        'nor',
        'not',
        'don',
        "dont",
        'ain',
        'aren',
        "arent",
        'couldn',
        "couldnt",
        'didn',
        "didnt",
        'doesn',
        "doesnt",
        'hadn',
        "hadnt",
        'hasn',
        "hasnt",
        'haven',
        "havent",
        'isn',
        "isnt",
        'mightn',
        "mightnt",
        'mustn',
        "mustnt",
        'needn',
        "neednt",
        'shan',
        "shant",
        'shouldn',
        "shouldnt",
        'wasn',
        "wasnt",
        'weren',
        "werent",
        'won',
        "wont",
        'wouldn',
        "wouldnt",
    ],
}

PUNCTUATION = [".", ",", "'", '"', "!", "?", ";", ":", "-", "(", ")","[","]","{","}"] #now also removes parantheses, brackets, and braces

def trim(
    text: str, stemmer: Optional[str] = None, language: str = "english", remove_spaces: bool = True,
        remove_stopwords: bool = True, remove_punctuation: bool = True) -> str:

    if language not in stopwords.fileids():
        raise ValueError("Unsupported language")

    accepted_stemmers = ("snowball", "porter", "lancaster")
    if stemmer and stemmer not in accepted_stemmers:
        raise ValueError("Stemmer must be one of", accepted_stemmers)

    # merge contractions
    text: str = text.replace("'", "").replace("’", "")

    # tokenize words, keep uppercase
    tokenized: List = nltk.word_tokenize(text)

    if remove_punctuation:
        tokenized = [word for word in tokenized if word not in PUNCTUATION]

    if remove_stopwords:
        nltk_stopwords = stopwords.words(language)
        words_to_exclude = set(
            nltk_stopwords + ARTICLES_PREPOSITIONS.get(language, [])
        ) - set(NEGATION_WORDS.get(language, []))

        tokenized = [word for word in tokenized if word.lower() not in words_to_exclude]

    words = tokenized

    if stemmer:
        if stemmer == "porter":
            stemmer = PorterStemmer()
        elif stemmer == "snowball":
            stemmer = SnowballStemmer(language)
        elif stemmer == "lancaster":
            stemmer = LancasterStemmer()
        words = [stemmer.stem(word) for word in tokenized]

        # restore title_case and uppercase after stemming
        case_restored = []
        for i, word in enumerate(words):
            if tokenized[i].istitle():
                word = word.title()
            elif tokenized[i].isupper():
                word = word.upper()
            case_restored.append(word)

        words = case_restored
    #delete the last period 
    words2 = words.pop()

    # remove spaces
    #join_str = "" if remove_spaces else " "
    #trimmed: str = join_str.join(words).strip()
    #if not remove_punctuation:
        # this is a hack to remove spaces before punctuation
        #trimmed = re.sub(r"\s([?.!,:;])", r"\1", trimmed)
        
    return ' '.join(words)

In [None]:
#text is from the input above
trimmed_text = trim(text)
print(trimmed_text)

In [None]:
import tiktoken

In [None]:
#see if there is a valid number of tokens 
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")

In [None]:
numTokens = len(encoding.encode(trimmed_text))

In [None]:
#just for comparison with original 
len(encoding.encode(text))

In [None]:
if numTokens >= 4096:
  print('TOO MANY TOKENS')
else:
  print('Go ahead')

# Setting Up Langchain and GPT-3.5 turbo

In [None]:
import os

os.environ["OPENAI_API_KEY"] = ""

In [None]:
!pip show langchain

# Text Summarization


In [None]:
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate, LLMChain
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)

chatGPT = ChatOpenAI(temperature=0)

In [None]:
messages = [
    SystemMessage(content="You are an expert at making strong factual summarizations.\
     Take the article submitted by the user and produce a factual useful summary"),
    HumanMessage(content=trimmed_text)
]
responses = chatGPT(messages)

In [None]:
#summarizing the text (something with langchain) 
summarizedText = responses
print(summarizedText)



# Conversion to realistic text to speech and read out 

(can take a while) 

In [None]:
import nltk
nltk.download('punkt')

In [None]:
#long form generation

import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"


from IPython.display import Audio
import nltk  # we'll use this to split into sentences
import numpy as np

from bark.generation import (
    generate_text_semantic,
    preload_models,
)
from bark.api import semantic_to_waveform
from bark import generate_audio, SAMPLE_RATE



In [None]:
preload_models()

In [None]:
script = summarizedText.replace("\n", " ").strip()
sentences = nltk.sent_tokenize(script)
GEN_TEMP = 0.6
SPEAKER = "v2/en_speaker_6"
silence = np.zeros(int(0.25 * SAMPLE_RATE))  # quarter second of silence

pieces = []
for sentence in sentences:
    semantic_tokens = generate_text_semantic(
        sentence,
        history_prompt=SPEAKER,
        temp=GEN_TEMP,
        min_eos_p=0.05,  # this controls how likely the generation is to end
    )

    audio_array = semantic_to_waveform(semantic_tokens, history_prompt=SPEAKER,)
    pieces += [audio_array, silence.copy()]
    Audio(np.concatenate(pieces), rate=SAMPLE_RATE)

# Licenses

MIT License

Copyright (c) 2023 Vlad Gheorghe

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

___

The MIT License

Copyright (c) Harrison Chase

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

___

MIT License

Copyright (c) Suno, Inc

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

