In [None]:
import requests
from pathlib import Path
import json
import os

from langchain.document_loaders import JSONLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain, RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain import OpenAI, PromptTemplate

from urllib.parse import urlencode


In [None]:
channel = "flow+with+mira"

In [None]:
channel_list = ["flow with mira", "dansique fitness", "charlie follows"]

In [None]:
def get_youtube_videos(channel: str):
    youtube_url = "https://serpapi.com/search.json?engine=youtube&search_query={channel}&api_key={api_key}".format(
        channel=channel, api_key=os.environ["SERPAPI_API_KEY"]
    )
    headers = {
        "Content-type": "application/json"
    }
    response = requests.get((youtube_url), headers=headers).json()

    return response

In [None]:
for channel in channel_list:
    channel_videos = get_youtube_videos(channel)
    Path("{channel}_youtube_results.json".format(channel=channel)).write_text(json.dumps(channel_videos))

In [None]:
channel_videos['video_results'][0]['title'].replace("\'", "\"")

In [None]:
channel = "flow with mira"

In [None]:
def metadata_func(record: dict, metadata: dict) -> dict:

    # metadata["description"] = record.get("description")
    metadata["link"] = record.get("link")

    return metadata

In [None]:
for channel in channel_list:
    print(channel)
    loader = JSONLoader(file_path="{channel}_youtube_results.json".format(channel=channel),
                jq_schema=".video_results[]",content_key="description", metadata_func=metadata_func
            )
    raw_documents = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=10)
    documents = text_splitter.split_documents(raw_documents)
    for doc in documents:
        new_url = doc.metadata["link"]
        doc.metadata.update({"source": new_url})
    embeddings = OpenAIEmbeddings()
    vectorstore = FAISS.from_documents(documents, embeddings)
    vectorstore.save_local("workout_index")

    new_vectorstore = FAISS.load_local("workout_index", embeddings)


In [None]:
from langchain.chains import LLMChain

prompt_template = """Use the context below to find a workout that matches the topic:
    Context: {context}
    Topic: {topic}
    Workout:"""

PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "topic"])

llm = OpenAI(temperature=0)

chain = LLMChain(llm=llm, prompt=PROMPT)

In [None]:
topic = "Dansique workout for the whole body up to 60 minutes"

In [None]:
docs = new_vectorstore.similarity_search(topic, k=20)
# inputs = [{"context": doc.page_content, "topic": topic} for doc in docs]
# print(chain.apply(inputs))