In [171]:
from langchain_core.prompts import PipelinePromptTemplate, PromptTemplate
from langchain_openai import ChatOpenAI
import os
import json
import random

In [172]:
file_path = 'demography.json'
demography = None
with open(file_path, 'r') as file:
    demography = json.load(file)
demography

{'age': [{'category': 'Teens', 'start': 13, 'end': 19},
  {'category': 'Young Adults', 'start': 20, 'end': 35},
  {'category': 'Adults', 'start': 35, 'end': 50},
  {'category': 'Seniors', 'start': 51, 'end': 70}],
 'genders': ['Female', 'Male', 'Non-binary'],
 'ethnicities': ['Hispanic/Latino',
  'Caucasian',
  'Asian',
  'African American',
  'Mixed race',
  'Native American'],
 'locations': ['North America',
  'Europe',
  'Asia',
  'South America',
  'Africa',
  'Oceania'],
 'professionalBackgrounds': ['Blogger',
  'Professional Critic',
  'Film Student',
  'Casual Moviegoer',
  'YouTuber',
  'Industry Professional',
  'IT Professional',
  'Marketing Professional',
  'Teacher',
  'Medical Professional',
  'Literature Teacher',
  'History Teacher'],
 'educations': ['High School',
  "Bachelor's Degree",
  "Master's Degree",
  'Doctorate',
  'Self-taught'],
 'incomeLevels': ['Low Income', 'Middle Income', 'High Income']}

In [173]:
def get_random_persona(demography, seed=None):
    if seed:
        random.seed(seed)
    while True:
        permutation = {}
        permutation['age'] = random.choice(demography['age'])
        permutation['gender'] = random.choice(demography['genders'])
        permutation['ethnicity'] = random.choice(demography['ethnicities'])
        permutation['location'] = random.choice(demography['locations'])
        permutation['professionalBackground'] = random.choice(demography['professionalBackgrounds'])
        permutation['education'] = random.choice(demography['educations'])
        permutation['incomeLevel'] = random.choice(demography['incomeLevels'])
        yield permutation

In [174]:
random_persona_generator = get_random_persona(demography, 42)
next(random_persona_generator)

{'age': {'category': 'Teens', 'start': 13, 'end': 19},
 'gender': 'Female',
 'ethnicity': 'Native American',
 'location': 'Asia',
 'professionalBackground': 'Casual Moviegoer',
 'education': "Bachelor's Degree",
 'incomeLevel': 'Low Income'}

In [213]:
individual_prompt = PromptTemplate.from_template("""
You're an individual with a specific age, gender, or ethnic background from a defined geographic location.

Those parameters are specified within the following block.
It starts with "BEGIN demography" and ends with "END demography".

BEGIN demography
age range start: {ageStart}
age range end: {ageEnd}
gender: {gender}
ethnicity: {ethnicity}
location: {location}
professional background: {profession}
education: {education}
income: {income}
END demography
""")

In [214]:
simple_review_prompt = PromptTemplate.from_template("""
Write a review about Witcher season 2? 
""")

In [215]:
project_prompt = PromptTemplate.from_template("""
Below are high-level movie or TV show ideas and some essential parameters.
It starts with "BEGIN content" and ends with "END content".

BEGIN content
name: {name}
type: {type}
cast: {cast}
budget: {budget}
END content
""")

In [242]:
cta_prompt = PromptTemplate.from_template("""
Write a simple review as someone who just watched the {name}.
Highlight points that you liked and didn't like.
Provide a rating that you give to this content on a scale from 1 to 5.
If you look forward to watch it again answer with 0 for No or 1 for Yes.
Be honest, your feedback is valuable and will be applied by a studio to improve the content.

Answer in JSON format, use following structure:
{{
    "positives": <your answer>
    "negatives": <your answer>
    "rating": <your rating>
    "lookingForward": <your answer>
}}
""")

In [243]:
full_prompt = individual_prompt + project_prompt + cta_prompt

In [244]:
model=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-1106", openai_api_key=os.getenv("OPENAI_API_KEY"), model_kwargs={"response_format": {"type": "json_object"}},)

In [245]:
def generate_demography_prompt_input(persona):
    return {
      "ageStart": persona["age"]["start"],
      "ageEnd": persona["age"]["end"],
      "gender": persona["gender"],
      "ethnicity": persona["ethnicity"],
      "location": persona["location"],
      "profession": persona["professionalBackground"],
      "education": persona["education"],
      "income": persona["incomeLevel"]
    }

In [246]:
def generate_content_prompt_input(content):
    return {
        "name": content["name"],
        "type": content["type"],
        "cast": ",".join(content["cast"]),
        "budget": content["budget"]
    }

In [247]:
# prompt generator
def generate_prompt_input(persona, content):
    return {
        **generate_demography_prompt_input(persona),
        **generate_content_prompt_input(content)
    }

In [248]:
# # prompt testing
# persona = next(random_persona_generator)
# content = {
#     "name": "The witcher movie",
#     "type": "Movie",
#     "cast": ["Chris Hemsworth", "Natalia Oreiro"],
#     "budget": 4_000_000
# }
# prompt_input = generate_prompt_input(persona, content)
# full_prompt.invoke(prompt_input)
# chain = full_prompt | model 
# chain.invoke(prompt_input)

In [249]:
def generate_reviews(content, how_many):
    chain = full_prompt | model 
    for _ in range(how_many):
        persona = next(random_persona_generator)
        prompt_input = generate_prompt_input(persona, content)
        review = chain.invoke(prompt_input)
        print(review)

In [250]:
content = {
    "name": "The witcher movie",
    "type": "Movie",
    "cast": ["Chris Hemsworth", "Natalia Oreiro"],
    "budget": 4_000_000
}
generate_reviews(content, 1)

content='{\n    "positives": "I enjoyed the action scenes and the special effects were impressive. The casting of Chris Hemsworth and Natalia Oreiro was also a highlight."\n    ,\n    "negatives": "However, I felt that the plot was a bit confusing and hard to follow at times. The pacing of the movie also felt a bit off."\n    ,\n    "rating": 3\n    ,\n    "lookingForward": 0\n}' response_metadata={'token_usage': {'completion_tokens': 92, 'prompt_tokens': 301, 'total_tokens': 393}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': 'fp_482d920018', 'finish_reason': 'stop', 'logprobs': None} id='run-f1679a6a-c6b8-4681-ba66-8ad397ea6208-0' usage_metadata={'input_tokens': 301, 'output_tokens': 92, 'total_tokens': 393}
