generated from Clarifai/module-gallery
-
Notifications
You must be signed in to change notification settings - Fork 1
/
app.py
411 lines (316 loc) · 15.7 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
import glob
import json
import os
import streamlit as st
from clarifai.modules.css import ClarifaiStreamlitCSS
from clarifai.client.model import Model
from clarifai.client.input import Inputs
from services import voice
from services import video
from services import story as Story
from services import llm
from dotenv import load_dotenv
import datetime
import yaml
import services.research_agend as research_agend
import random
load_dotenv()
########################
# Required in every Clarifai streamlit app
########################
######################################################################################################
# In this section, we set the user authentication, user and app ID, model details, and the URL of
# the text we want as an input. Change these strings to run your own example.
######################################################################################################
# Your PAT (Personal Access Token) can be found in the portal under Authentification
# Specify the correct user_id/app_id pairings
# Since you're making inferences outside your app's scope
USER_ID = 'openai'
APP_ID = 'chat-completion'
# Change these to whatever model and text URL you want to use
MODEL_ID = 'openai-gpt-4-vision'
MODEL_VERSION_ID = '266df29bc09843e0aee9b7bf723c03c2'
RAW_TEXT = 'What do you see on this picture?'
# To use a hosted text file, assign the url variable
# TEXT_FILE_URL = 'https://samples.clarifai.com/negative_sentence_12.txt'
# Or, to use a local text file, assign the url variable
# TEXT_FILE_LOCATION = 'YOUR_TEXT_FILE_LOCATION_HERE'
############################################################################
# YOU DO NOT NEED TO CHANGE ANYTHING BELOW THIS LINE TO RUN THIS EXAMPLE
############################################################################
PAT = os.environ['PAT']
ELEVENLABS_TOKEN = os.environ['ELEVENLABS_TOKEN']
SERP_TOKEN = os.environ['SERP_API']
metaphor_token = os.environ['METAPHOR_TOKEN']
os.environ['CLARIFAI_PAT'] = PAT
def show_all_scenes(_story):
print("showing story with "+str(len(_story.getScenes()))+" scenes")
st.session_state["current_story"] = _story
container_to_show_in = st.session_state["story_area2"]
container_to_show_in = container_to_show_in.empty()
container_to_show_in = container_to_show_in.container()
with container_to_show_in:
st.markdown("## Current Story about "+_story.getRoleText())
st.markdown("*What Employees like:* "+_story.getEmployeeFeedback())
column_count = 3
if _story.hasVideo() == True:
column_count = column_count + 1
# else:
# if st.button("video", key="video_in_editor"+str(_story.getRoleText())+"_"+str(container_to_show_in)):
# vid = video.Video()
# vid.create_video(_story)
# st.write("video created")
# session_stories.save()
# st.experimental_rerun()
row_count = int(len(_story.getScenes()) / column_count) + 1
print("rows: "+str(row_count))
rows = st.columns(row_count)
row_colums = []
for row in rows:
#with row:
columns = st.columns(column_count)
row_colums.append(columns)
#columns = st.columns(column_count)
scene_counter = 0
col_counter = 0
scenes = _story.getScenes()
if _story.hasVideo() == True:
with row_colums[0][col_counter]:
st.write("## Generated Video")
vid = _story.getVideo()
st.video(vid)
col_counter = col_counter + 1
for scene in scenes:
#with columns[col_counter % len(columns)]:
row_to_use = col_counter // column_count
col_to_use = col_counter % column_count
with row_colums[row_to_use][col_to_use]:
print(f"using row {row_to_use} and col {col_to_use}")
scene_counter = scene_counter + 1
col_counter = col_counter + 1
st.write(f"## Scene {scene_counter}")
desc = _story.getSceneDescription(scene_counter)
st.markdown(f"{desc}")
st.write("### Visual")
image_prompt = _story.getVisualPrompt(scene_counter)
st.markdown(f"{image_prompt}")
image_filepath = _story.getSceneImage(scene_counter)
if image_filepath != "":
st.image(image_filepath)
else:
st.markdown("no image")
st.markdown("### Voice")
audio_prompt = _story.getAudioPrompt(scene_counter)
st.markdown(f"{audio_prompt}")
audio_path = _story.getSceneAudio(scene_counter)
if audio_path != "":
st.audio(audio_path)
else:
st.write("no audio")
st.set_page_config(layout="wide")
st.title("EyeCue - GenAI for recruiting videos")
#st.markdown("## the smart video creator for Career inspiration")
home,research = st.tabs(["Generate Story", "Research Agend"])
eleven_token = ""
stories = Story.stories()
#------------- load stories from filesystem ------------------- #
#check if stories exist in session state, if not create
if "stories" not in st.session_state:
if stories.isSaved():
stories.load()
#st.write("loaded stories from filesystem")
else:
st.write("no stories found, creating new")
st.session_state["stories"] = stories
# ------------------- THE SIDEBAR ------------------- #
with st.sidebar:
eleven_token = st.text_input("elevenlabs token", value=ELEVENLABS_TOKEN)
#load all files in folder ./audio and display them as audio files to click and play
d = os.path.dirname(__file__)
st.markdown(f"## Generated Stories ("+str(st.session_state.stories.getStoryAmount())+")")
session_stories = st.session_state["stories"]
on = st.toggle('Show Archive')
if on:
for element in session_stories.getStories(): #<._stories["_stories"]:
story = element["story"]
scenes = story.getScenes()
#print(scenes)
with st.expander(story.getRoleText()):
if story.hasVideo() != True:
if st.button("video", key="video"+str(element["id"])):
vid = video.Video()
vid.create_video(story)
st.write("video created")
session_stories.save()
if st.button("edit", key="edit"+str(element["id"]),on_click=show_all_scenes, args=[story]): #, key="edit "+str(element["id"])):
st.write("editing")
#set current story to session state to edit
st.session_state["current_story"] = story
if story.hasVideo():
st.markdown("### Video")
st.video(f"{story.getVideo()}")
# iterate all scenes in story
for scene in story.getScenes():
st.write(f"## Scene {scene.get('number','')}")
st.markdown(f"{scene['text']}")
#st.write("### Visual")
#st.markdown(f"{scene['visualprompt']}")
st.image(scene["image"])
#st.markdown("### Voice")
#st.markdown(f"{scene['voiceover']}")
if scene["audio"] != "":
st.audio(scene["audio"])
else:
st.write("no token, no audio")
# for audio_path in story.getAudioPaths():
# if audio_path != "":
# st.audio(audio_path)
# else:
# st.write("no audio")
# #display generated images
# for image_path in story.getImagePaths():
# st.image(image_path)
#------------------- The Repository in sidebar ------------------- #
st.markdown("# MEDIA REPOSITORY")
with st.expander("## Generated audio files"):
counter = 0
for fi, f in enumerate(glob.glob(os.path.join(d, "audio", "*.wav"))):
name = os.path.splitext(os.path.basename(f))[0]
counter = counter + 1
c1, c2 = st.columns([0.7,0.3])
with c1:
st.audio(f, format='audio/wav', start_time=0)
with c2:
if st.button("delete", key="delete "+str(counter)):
os.remove(f)
st.write("deleted")
# ------------------ Images in sidebar ------------------------ #
#load all files in folder ./images and display them as images
with st.expander("## Generated images"):
counter = 0
for fi, f in enumerate(glob.glob(os.path.join(d, "images", "*.png"))):
name = os.path.splitext(os.path.basename(f))[0]
counter = counter + 1
#display image
st.image("./images/"+name+".png")
#display delete button
if st.button("delete", key="delete image "+str(counter)):
os.remove(f)
st.write("deleted")
# ------------------ Story generation in main page ------------------------ #
with research:
research_agend.show(metaphor_token)
with home:
# search_query = st.text_input("Search Query", "Data Scientist")
# if st.button("Search Jobs"):
# research_agend.search_jobs(SERP_TOKEN,search_query)
# st.write("searching jobs for "+search_query)
with st.form(key="story_form"):
job_role = st.text_input("Job Role",key="job_role")
company = st.text_input("Company",value="LabLab.ai")
max_number_of_scenes = st.number_input("Max number of scenes", min_value=1, max_value=10, value=3)
employee_feedback = ""
if st.form_submit_button("Generate Storyboard"):
with st.status(f"first i will do some research about what people like at {company}") as status:
contents, summary = research_agend.run_metaphor_search(company,metaphor_token)
st.write(summary)
status.update(label="now i will extract the essence of the research")
storyboard = llm.llm_multimodal(None,f"Generate a 40 seconds video storyboard for the job role {job_role} at the company {company}. Here are some things that employees like about the company, uses this in your storyline: '{summary}'. Now, Describe each of {max_number_of_scenes} scene. For each scene find very attention catching hooks and generate the following, - desciption: Describe the scene shortly with a great hook - visualprompt: Generate a prompt for the scene for an image generator, describing very positive energetic persons, clothes, light, perspective in this prompt. Do not include Text or logos. Prefer showing happy people. Generate a portrait orientation in 9:16 format - voiceover: the very short and compelling one sentence text of the speaker in the voice of Sir Attemborough (without mentioning his name). The video should be very positive and inspire to do the next career step. Start with presenting the role and end with present some great jobs. Do not mention your name. Write from an external perspective of someone outside the company who knows a lot about it. Output a clean utf-8 JSON as text without JavaScript Object Notation formatted data, that contains a scenes array with the scenes at the top level.")
status.update(label="now we created a storyboard for you and will start to generate the scenes.")
print(storyboard)
storyboard = storyboard.replace("```json","")
storyboard = storyboard.replace("```","")
storyboard = storyboard.replace("json","") #remove javascript object notation from string, if LLM is not compliant
#st.write(storyboard)
#st.write(str(type(storyboard)))
#------------------- The current story in main page ------------------- #
try:
parsed_storyboard = json.loads(storyboard)
story = Story.story(job_role)
story.addEmployeeFeedback(summary)
story.addStory(storyboard)
scenes = parsed_storyboard["scenes"]
scene_counter = 0
for scene in scenes:
scene_counter = scene_counter + 1
image_filepath, prompt, width, height = llm.generate_image(scene["visualprompt"])
status.update(label="generated an image for the scene")
if True or eleven_token != "": #we ssitch to openAI voice, so always generate for now
audio_filepath = voice.generate_audio(scene["voiceover"],eleven_token)
status.update(label="generated audio for the scene")
else:
audio_filepath = ""
story.addScene(scene["description"],scene['visualprompt'],image_filepath,scene["voiceover"],audio_filepath,scene_counter)
status.update(label="added another scene to story")
show_all_scenes(story)
except Exception as e:
st.write("not parsed storyboard")
st.write(e)
stories = st.session_state["stories"]
stories.addStory(story)
stories.save()
st.session_state["stories"] = stories
st.session_state["current_story"] = story
st.balloons()
#show_all_scenes(story)
# if job_role and st.button("Generate Story"):
# prompt = f"Make a 3 sentence emotional elevator pitch to a candidate in the style of Sir Attenborough, that the {job_role} is great and you are going to present some good jobs. Do not mention your name. Also make a prompt to generate a very appealing photography of an impressive person doing this job and name the role. Output both as a JSON with attributes story and impage_prompt."
# text = llm.llm_multimodal(None,prompt)
# #convert text to JSON
# parsed = json.loads(text)
# story_text = parsed["story"]
# story = Story.story(job_role)
# story.addStory(story_text)
# st.markdown(text)
# audio_filepath = voice.generate_audio(story_text,eleven_token)
# if audio_filepath == "":
# st.write("no audio generated, maybe no Eleven_labs token entered?")
# story.addAudio(audio_filepath,story_text)
# image_filepath, prompt, width, height = llm.generate_image(parsed["image_prompt"])
# story.addImage(image_filepath, width, height, parsed["image_prompt"])
# stories = st.session_state["stories"]
# stories.addStory(story)
# stories.save()
# st.session_state["stories"] = stories
# st.session_state["current_story"] = story
# #st.experimental_rerun() #updating ui to reflect new story
# #show_story_on_mainpage(story)
print("calling st.empty")
if "current_story" in st.session_state:
_story = st.session_state["current_story"]
if _story.hasVideo() != True:
if st.button("Generate Video", key="video_in_mainpage_top"+str(_story.getRoleText())):
vid = video.Video()
vid.create_video(_story)
st.write("video created")
session_stories.save()
st.balloons()
#st.experimental_rerun()
placeholder = st.empty()
with placeholder.container():
st.info("### Select a Story in the Archive left or generate a new Story above")
st.session_state["story_area2"] = placeholder
if "current_story" in st.session_state:
print("showing story from session.")
# _story = st.session_state["current_story"]
# if _story.hasVideo() == True:
# if st.button("show Video", key="video_in_modal"+str(_story.getRoleText())):
# with st.Modal():
# st.header("Video for "+_story.getRoleText())
# st.video(f"{_story.getVideo()}")
# st.button("Close Modal")
# else:
# if st.button("video", key="video_in_editor"+str(_story.getRoleText())):
# vid = video.Video()
# vid.create_video(_story)
# st.write("video created")
# session_stories.save()
# #st.experimental_rerun()
show_all_scenes(st.session_state["current_story"])
ClarifaiStreamlitCSS.insert_default_css(st)
#st.markdown("Please select a specific page by adding it to the url")
# cols = st.columns(10)
# d = os.path.dirname(__file__)
# for fi, f in enumerate(glob.glob(os.path.join(d, "pages", "*.py"))):
# name = os.path.splitext(os.path.basename(f))[0]
# ClarifaiStreamlitCSS.buttonlink(cols[fi % len(cols)], name, "/" + name)