# FIRST: Go to Runtime -> Change Runtime type -> GPU accelerated


This next codeblock installs an archived version of transformers

installs datasets==1.16.1 for putting example datasets (might not be needed)

bitsandbytes for tokenizing

In [1]:
!pip uninstall -y transformers && pip install --no-cache-dir https://github.com/deniskamazur/transformers/archive/gpt-j-8bit.zip
!pip install datasets==1.16.1 # for loading example datasets (might not be needed)
!pip install bitsandbytes # for tokenizing

Found existing installation: transformers 4.17.0.dev0
Uninstalling transformers-4.17.0.dev0:
  Successfully uninstalled transformers-4.17.0.dev0
Collecting https://github.com/deniskamazur/transformers/archive/gpt-j-8bit.zip
  Downloading https://github.com/deniskamazur/transformers/archive/gpt-j-8bit.zip
[K     | 10.2 MB 4.0 MB/s
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h    Preparing wheel metadata ... [?25ldone
Building wheels for collected packages: transformers
  Building wheel for transformers (PEP 517) ... [?25ldone
[?25h  Created wheel for transformers: filename=transformers-4.17.0.dev0-py3-none-any.whl size=3653094 sha256=704c7f3690ae04d33cab53728449781efe27841b2f402533f4dbd9698ea7fcdb
  Stored in directory: /tmp/pip-ephem-wheel-cache-mqpxp1za/wheels/5c/94/2d/f40e0fc8c0fa3b5e042a8ce31f18fb494ff4dad8a2134121b5
Successfully built transformers
Installing collected packages: transformers
Successfully in

# Below loads model

In [2]:
def gpu_model():
  import torch
  import transformers
  from transformers.models.gptj import GPTJForCausalLM #gptj only?
  device = 'cuda' if torch.cuda.is_available() else 'cpu' # select gpu otherwise use cpu
  tokenizer = transformers.AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B") #loads original 6B model
  model = GPTJForCausalLM.from_pretrained("hivemind/gpt-j-6B-8bit", low_cpu_mem_usage=True).to(device) #use pretrained 8bit "shrunken" model used for low memory aka using for google colab
  return [device,tokenizer,model]
  
def cpu_model():
  import torch
  import transformers
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
  device = 'cuda' if torch.cuda.is_available() else 'cpu' # select gpu otherwise use cpu
  tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
  model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-cnn-12-6")
  return [device,tokenizer,model]



# Install youtubetranscript api for text

In [3]:
!pip install youtube_transcript_api



# Summary pipeline:

In [4]:
!pip install fastapi nest-asyncio pyngrok uvicorn
!ngrok authtoken 2B39YnQIGeHG0FeFnLIgkPVJmdq_3eXfqZe6aXCu8YkFZ2nQ9

# !pip install flask
# !pip install flask-ngrok

Authtoken saved to configuration file: /home/studio-lab-user/.ngrok2/ngrok.yml


In [9]:
!ngrok http -host-header=rewrite localhost:8000

7[?47h[?1h=[H[2J[m[38;5;6m[48;5;16m[1m[1;1Hngrok[m[38;5;16m[48;5;16m [m[38;5;7m[48;5;16mby[m[38;5;16m[48;5;16m [m[38;5;6m[48;5;16m[1m@inconshreveable[m[38;5;16m[48;5;16m                                       [m[38;5;7m[48;5;16m(Ctrl+C to quit)[m[38;5;16m[48;5;16m[2;1H                                                                                [m[38;5;6m[48;5;16m[3;1HSession Status                connecting[m[38;5;16m[48;5;16m                                        [m[38;5;7m[48;5;16m[4;1HVersion                       2.3.40[m[38;5;16m[48;5;16m                                            [m[38;5;7m[48;5;16m[5;1HRegion                        United States (us)[m[38;5;16m[48;5;16m                                [m[38;5;7m[48;5;16m[6;1HWeb Interface                 http://127.0.0.1:4040[m[38;5;16m[48;5;16m                             [7;1H                                                                                [m[3

In [None]:
from fastapi import FastAPI, Request
import nest_asyncio
from pyngrok import ngrok
import uvicorn
from youtube_transcript_api import YouTubeTranscriptApi
import torch
from typing import Union
from pydantic import BaseModel
import re
from transformers import pipeline
from typing import TypeVar
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM



dev_tok_model = cpu_model()
tokenizer = dev_tok_model[1]
model = dev_tok_model[2]

T = TypeVar("T")


class Result:
    def __init__(self, ok: bool, value: T = None, error: str = ""):
        self.ok = ok
        # If the result is ok, set the value. Otherwise, set the error
        self.value = value if ok else None
        self.error = error if not ok else ""

    def __str__(self):
        # If the result is ok, return the value. Otherwise, return the error
        return f"Ok: {self.value}" if self.ok else f"Err: {self.error}"

    def __repr__(self):
        return str(self)

    def is_ok(self):
        return self.ok

    def is_err(self):
        return not self.ok

    def unwrap(self):
        if self.is_ok():
            return self.value
        else:
            raise Exception(self.error)

    def match(self, ok_func, err_func):
        if self.is_ok():
            return ok_func(self.value)
        else:
            return err_func(self.error)

    # func must return a Result
    def flat_map(self, func):
        if self.is_ok():
            return func(self.value)
        else:
            return self


is_youtube_regex = re.compile(r"https?://(www\.)?youtube\.com")


def is_youtube(link: str) -> bool:
    """Check if link is a YouTube link"""
    return is_youtube_regex.match(link) is not None


youtube_video_id_regex = re.compile(r"https?://(?:(?:(?:www\.youtube\.com|m\.youtube\.com)/watch\?v=([0-9A-Za-z_-]{10}[048AEIMQUYcgkosw]).*)|(?:youtu\.be/([0-9A-Za-z_-]{10}[048AEIMQUYcgkosw])))")


def get_youtube_video_id(link: str) -> Result:
    """Get the video id from a YouTube link"""
    if not is_youtube(link):
        return Result(False, error="Not a youtube link")
    match = youtube_video_id_regex.match(link)
    if match is None:
        return Result(False, error="Invalid youtube link")
    return Result(True, value=match.group(1))


def get_transcript(video_id: str) -> Result:
    try:
        text = YouTubeTranscriptApi.get_transcript(video_id)
        # We need to transform the transcript into a string
        return Result(True, value=" ".join([line["text"] for line in text]))
    except Exception as e:
        return Result(False, error=str(e))


def get_summary(text: str, gen) -> Result:
    # Gen must be a transformers pipeline
    try:
        prompt = text + "\n TLDR: "
        # WARNING: This is designed around gpt-neo-125M, which can be called in the following way
        # Other models may require different parameters, and different ways of obtaining the summary
        # Read through the documentation of the models for proper usage
        summary = gen(prompt, do_sample=True, temperature=0.9, max_new_tokens=200)[0]["generated_text"]
        return Result(True, value=summary[len(prompt):].strip())
    except Exception as e:
        return Result(False, error=str(e))


def link_to_summary(link: str, gen) -> Result:
    return get_youtube_video_id(link) \
        .flat_map(get_transcript) \
        .flat_map(lambda text: get_summary(text, gen))


def link_to_transcript(link: str) -> Result:
    return get_youtube_video_id(link) \
        .flat_map(get_transcript)


def link_to_prompt(link: str) -> Result:
    return get_youtube_video_id(link) \
        .flat_map(get_transcript) \
        .flat_map(lambda text: Result(True, value=text + "\n TLDR: "))


def unwrap_result(result: Result):
    if result.is_ok():
        return result.value
    else:
        raise Exception(result.error)


def main(link):
    #['audio-classification', 'automatic-speech-recognition', 'conversational', 'feature-extraction', 'fill-mask', 'image-classification', 'image-segmentation', 'ner', 'object-detection', 'question-answering', 
    #'sentiment-analysis', 'summarization', 'table-question-answering', 'text-classification', 'text-generation', 'text2text-generation', 'token-classification', 'translation', 'zero-shot-classification', 'translation_XX_to_YY']
    pipe = pipeline('text-generation', model='EleutherAI/gpt-neo-125M')
    result = link_to_summary(link, pipe)
    return result


app = FastAPI()

@app.get('/index')
async def home():
  result = main('https://www.youtube.com/watch?v=P7yM0TKvUm4')
  return result

class Item(BaseModel):
    id: Union[int, None] = None
    link: str
    summary: Union[str, None] = None
    rating: Union[int, None] = None





@app.post("/items")
async def getInformation(item : Request):
    req_info = await item.json()
    # persist id
    # delete link
    # retreive summary
    # keep same rating.
    print(type(req_info))
    link = req_info['link']
    summary = main(link)
    req_info['summary'] = summary
    return {
        "status" : "SUCCESS",
        "data" : req_info,
        "type" : str(type(req_info))
    }
# @app.post("/items/")
# async def create_item(item: Item):
#     return item

ngrok_tunnel = ngrok.connect(8000)
print('Public URL:', ngrok_tunnel.public_url)
nest_asyncio.apply()
uvicorn.run(app, port=8000)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Public URL: http://7c47-3-20-229-229.ngrok.io


INFO:     Started server process [497]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


INFO:     2607:fb90:8a80:96e9:31b2:ef33:e5e8:27d4:0 - "GET /index HTTP/1.1" 200 OK
<class 'dict'>


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


INFO:     172.58.180.187:0 - "POST /items HTTP/1.1" 200 OK


In [None]:
from youtube_transcript_api import YouTubeTranscriptApi

raw_data = YouTubeTranscriptApi.get_transcript("P7yM0TKvUm4")

# print(raw_data)
type(raw_data)
# first part of transcript data
print(raw_data[0]["text"])
text = ""
lis_text = []
for i, e in enumerate(raw_data):
  lis_text.append(raw_data[i]["text"])
# list to string
text = ''.join(lis_text)
print(text)



prompt = text + "\n TLDR:"

input_ids = tokenizer(prompt, return_tensors="pt").input_ids #.cuda() # .cuda() is for gpu, remove if not needed, gpt-j-6b needs gpu.

gen_tokens = model.generate(

    input_ids,

    do_sample=True,

    temperature=0.9,

    max_length=2000,

)


gen_text = tokenizer.batch_decode(gen_tokens)[0]

FROM THE ABOVE: we print the type and len of input_ids because unsure what it does

FROM THE ABOVE: .cuda() function uses our gpu, (if you delete it, code might still work?)

BELOW: print gen_text

In [None]:
print(type(input_ids))
print(len(input_ids))
print(gen_text)

# Things that need doing...?

For App:
- Ability to make API call to this model
- Ability to append Summaries to database

For Experiments:
- Code that lets us loop through our summaries
- Metrics for different models? Many metrics for same model? Different prompts?