In [8]:
import requests, os, re, threading, jsonlines, json, io, math
from dotenv import load_dotenv
from collections import defaultdict
import boto3

session = boto3.Session(profile_name="stryvia")
s3Client = session.client("s3")

load_dotenv()
splitSymbols = ["=", "+", "&", "->"]

In [9]:
renderIds=dict()
threads=[]
sampleProps=[]

In [10]:
def getAssetUrl(query, isVideo):
    # return "https://video.pexels.com"
    if isVideo:
        url = f"https://api.pexels.com/videos/search?query={query}&per_page=1&size=small"
        headers = {"Authorization": os.environ.get("pexelsKey")}
        try:
            response = requests.get(url=url, headers=headers)
            data=response.json()
            for file in data['videos'][0]['video_files']:
                if file['quality']!='sd':
                    return file['link']
        except Exception as e:
            print(e)
            return os.environ.get("blackVideoUrl")
    else:
        url = f"https://api.pexels.com/v1/search?query={query}&per_page=1&size=large"
        headers = {"Authorization": os.environ.get("pexelsKey")}
        try:
            response = requests.get(url=url, headers=headers)
            data=response.json()
            return data['photos'][0]['src']['large']
        except Exception as e:
            print(e)
            return os.environ.get("blackImageUrl")

In [11]:
def getGoogleImage(query):
    try:
        response=requests.post(os.environ.get("imageScraper"),json=dict(query=query))
        return response.json()
    except Exception as e:
        print(e)
        return os.environ.get("blackImageUrl")


In [12]:
def cleanExpression(expression):
    return expression.replace('.','')

In [13]:
def processExpression(i,expression):
    # try:
    format = {}

    for symbol in splitSymbols:
        if symbol in expression:
            format["operation"] = symbol
            format["content"] = []
            for part in expression.split(symbol):
                if len(part):
                    format["content"].append(processExpression(i,part))
            return format

    if "\u201c" in expression:
        format["operation"] = "verb"
        format["content"] = []
        pattern = r"\u201c(.*?)\u201d"
        withoutVerb = re.sub(pattern, "$$", expression)
        format["verb"] = re.findall(pattern, expression)[0]
        for part in withoutVerb.split("$$"):
            if len(part):
                format["content"].append(processExpression(i,part))
        return format

    query = expression.strip()
    if not len(query):
        format["operation"]="void"
        return format
    format["operation"] = "asset"
    format["expression"] = query
    assetUrl = getAssetUrl(query, isVideo=True)
    format["assetType"] = "video"
    if assetUrl == os.environ.get("blackVideoUrl"):
        assetUrl = getAssetUrl(query, isVideo=False)
        format["assetType"] = "image"
    format["assetUrl"] = assetUrl
    format["googleImage"]=getGoogleImage(query)
    return format
    # except Exception as e:
    print(e)
    return {"operation": "oops", "expression": expression}

In [14]:
def render(i, props):
    return i
    response = requests.get(os.environ.get("renderUrl"), data=props)
    res = response.json()
    return res["renderId"]

In [15]:
def worker(i, expression, duration):
    print(f"starting worker {i}")
    print(expression, duration)

    props = dict()

    props["duration"] = duration

    props["format"] = processExpression(i,cleanExpression(expression))
    print(props)
    sampleProps.append(dict(expression=expression, props=props))
    renderIds[f"{i}"] = render(i, props)

In [16]:
def getDuration(i,speechMarks):
    return (speechMarks[i+1]-speechMarks[i])/1000

In [17]:
def getFromS3(speechMarksKey):
    bucket = "ai-tutor-s3"
    response = s3Client.get_object(Bucket=bucket, Key=f"speech_marks/{speechMarksKey}.jsonl")
    data = []
    with io.BytesIO(response["Body"].read()) as file:
        with jsonlines.Reader(file) as reader:
            for line in reader:
                data.append(line)
    return data

In [18]:
def getSpeechMarks(speechMarksKey):
    data = getFromS3(speechMarksKey)
    speechMarks = []
    for obj in data:
        if obj["type"] == "sentence":
            speechMarks.append(obj["time"])
    speechMarks.append(data[-1]["time"] + 1000)
    return speechMarks

In [19]:
def master(sentences, expressions, speechMarksKey):
    speechMarks = getSpeechMarks(speechMarksKey)
    print(speechMarks)
    for i, sentence in enumerate(sentences):
        t = threading.Thread(
            target=worker,
            args=(
                i,
                expressions[i],
                getDuration(i, speechMarks),
            ),
        )
        threads.append(t)
        t.start()
    for t in threads:
        t.join()
    output = []
    for i in range(len(sentences)):
        output.append(renderIds[f"{i}"])
    print(output)
    return output

In [20]:
def getSentences():
    sentences=[]
    with jsonlines.open(f'../sample_data/{os.environ.get("testKey")}.jsonl') as data:
        for obj in data:
            if obj['type']=='sentence':
                sentences.append(obj['value'])
    return sentences

In [21]:
sentences = getSentences()
with open(f"../sample_data/{os.environ.get('expressionsKey')}.json", "r") as f:
    expressions = json.load(f)

master(sentences, expressions, os.environ.get("testKey"))
with open(f"../sample_data/{os.environ.get('propsKey')}.json", "w") as fp:
    json.dump(sampleProps, fp)

[0, 2569, 8164, 12490, 20445, 25964, 30592, 34766, 40906, 45603, 52812, 58130, 61807, 64856, 72451, 76779, 80046, 86587, 90679, 96715, 100084]
starting worker 0
Computer=Office. 2.569
starting worker 1
 CPU = office workers. 5.595
starting worker 2
 Workers “carry out” instructions. 4.326
starting worker 3
Cpu “processes” information. 7.955
starting worker 4
 Memory (RAM) = Worker’s desk space. 5.519
starting worker 5
 CPU “places” data. 4.628
starting worker 6
 Space “allows” task handling. 4.174
starting worker 7
 Storage = filing cabinets. 6.14
starting worker 8
 Storage 'stores' data. 4.697
starting worker 9
 Input devices= ”workers” receiving instructions. 7.209
starting worker 10
 Output devices = workers results board. 5.318
starting worker 11
 Operating System = Office Manager. 3.677
starting worker 12
 Project manager “coordinates” resources. 3.049
starting worker 13
Operating system “manages” ram & “prioritizes” tasks. 7.595
starting worker 14
 Programs & applications = Worke