In [1]:
from langchain_core.prompts import PipelinePromptTemplate, PromptTemplate
from langchain_openai import ChatOpenAI
import os
import json
import random
import sqlite3

In [2]:
file_path = 'demography.json'
demography = None
with open(file_path, 'r') as file:
    demography = json.load(file)
demography

{'age': [{'category': 'Teens', 'start': 13, 'end': 19},
  {'category': 'Young Adults', 'start': 20, 'end': 35},
  {'category': 'Adults', 'start': 35, 'end': 50},
  {'category': 'Seniors', 'start': 51, 'end': 70}],
 'genders': ['Female', 'Male', 'Non-binary'],
 'ethnicities': ['Hispanic/Latino',
  'Caucasian',
  'Asian',
  'African American',
  'Mixed race',
  'Native American'],
 'locations': ['North America',
  'Europe',
  'Asia',
  'South America',
  'Africa',
  'Oceania'],
 'professionalBackgrounds': ['Blogger',
  'Professional Critic',
  'Film Student',
  'Casual Moviegoer',
  'YouTuber',
  'Industry Professional',
  'IT Professional',
  'Marketing Professional',
  'Teacher',
  'Medical Professional',
  'Literature Teacher',
  'History Teacher'],
 'educations': ['High School',
  "Bachelor's Degree",
  "Master's Degree",
  'Doctorate',
  'Self-taught'],
 'incomeLevels': ['Low Income', 'Middle Income', 'High Income']}

In [3]:
def get_random_persona(demography, seed=None):
    if seed:
        random.seed(seed)
    while True:
        permutation = {}
        permutation['age'] = random.choice(demography['age'])
        permutation['gender'] = random.choice(demography['genders'])
        permutation['ethnicity'] = random.choice(demography['ethnicities'])
        permutation['location'] = random.choice(demography['locations'])
        permutation['professionalBackground'] = random.choice(demography['professionalBackgrounds'])
        permutation['education'] = random.choice(demography['educations'])
        permutation['incomeLevel'] = random.choice(demography['incomeLevels'])
        yield permutation

In [4]:
random_persona_generator = get_random_persona(demography, 42)
next(random_persona_generator)

{'age': {'category': 'Teens', 'start': 13, 'end': 19},
 'gender': 'Female',
 'ethnicity': 'Native American',
 'location': 'Asia',
 'professionalBackground': 'Casual Moviegoer',
 'education': "Bachelor's Degree",
 'incomeLevel': 'Low Income'}

In [5]:
individual_prompt = PromptTemplate.from_template("""
You're an individual with a specific age, gender, or ethnic background from a defined geographic location.

Those parameters are specified within the following block.
It starts with "BEGIN demography" and ends with "END demography".

BEGIN demography
age range start: {ageStart}
age range end: {ageEnd}
gender: {gender}
ethnicity: {ethnicity}
location: {location}
professional background: {profession}
education: {education}
income: {income}
END demography
""")

In [6]:
simple_review_prompt = PromptTemplate.from_template("""
Write a review about Witcher season 2? 
""")

In [7]:
project_prompt = PromptTemplate.from_template("""
Below are high-level movie or TV show ideas and some essential parameters.
It starts with "BEGIN content" and ends with "END content".

BEGIN content
name: {name}
type: {type}
cast: {cast}
budget: {budget}
END content
""")

In [8]:
cta_prompt = PromptTemplate.from_template("""
Write a simple review as someone who just watched the {name}.
Highlight points that you liked and didn't like.
Provide a rating that you give to this content on a scale from 1 to 5.
If you look forward to watch it again answer with 0 for No or 1 for Yes.
Be honest, your feedback is valuable and will be applied by a studio to improve the content.

Answer in JSON format, use following structure:
{{
    "review": <your answer>,
    "rating": <your rating>,
    "lookingForward": <your answer>
}}
""")

In [9]:
full_prompt = individual_prompt + project_prompt + cta_prompt

In [10]:
model=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-1106", openai_api_key=os.getenv("OPENAI_API_KEY"), model_kwargs={"response_format": {"type": "json_object"}},)

In [11]:
def generate_demography_prompt_input(persona):
    return {
      "ageStart": persona["age"]["start"],
      "ageEnd": persona["age"]["end"],
      "gender": persona["gender"],
      "ethnicity": persona["ethnicity"],
      "location": persona["location"],
      "profession": persona["professionalBackground"],
      "education": persona["education"],
      "income": persona["incomeLevel"]
    }

In [12]:
def generate_content_prompt_input(content):
    return {
        "name": content["name"],
        "type": content["type"],
        "cast": ",".join(content["cast"]),
        "budget": content["budget"]
    }

In [13]:
# prompt generator
def generate_prompt_input(persona, content):
    return {
        **generate_demography_prompt_input(persona),
        **generate_content_prompt_input(content)
    }

In [14]:
# # prompt testing
# persona = next(random_persona_generator)
# content = {
#     "name": "The witcher movie",
#     "type": "Movie",
#     "cast": ["Chris Hemsworth", "Natalia Oreiro"],
#     "budget": 4_000_000
# }
# prompt_input = generate_prompt_input(persona, content)
# full_prompt.invoke(prompt_input)
# chain = full_prompt | model 
# chain.invoke(prompt_input)

In [15]:
def generate_reviews(content, how_many):
    chain = full_prompt | model 
    for _ in range(how_many):
        persona = next(random_persona_generator)
        prompt_input = generate_prompt_input(persona, content)
        review = chain.invoke(prompt_input)
        print(review)

In [16]:
content = {
    "name": "The witcher movie",
    "type": "Movie",
    "cast": ["Chris Hemsworth", "Natalia Oreiro"],
    "budget": 4_000_000
}
generate_reviews(content, 1)

content='{\n    "review": "I really enjoyed the action scenes and the special effects in The Witcher movie. The cast did a great job in bringing the characters to life. However, I felt that the plot was a bit confusing and hard to follow at times. Overall, it was an entertaining watch.",\n    "rating": 3,\n    "lookingForward": 1\n}' response_metadata={'token_usage': {'completion_tokens': 77, 'prompt_tokens': 291, 'total_tokens': 368}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': 'fp_b953e4de39', 'finish_reason': 'stop', 'logprobs': None} id='run-f57986be-ac47-4636-a95d-ade5911a164c-0' usage_metadata={'input_tokens': 291, 'output_tokens': 77, 'total_tokens': 368}


In [17]:
#!rm data.db

In [19]:
con = sqlite3.connect('data.db')

cursor = con.cursor()

cursor.execute('''
CREATE TABLE IF NOT EXISTS simulations (
    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
    name TEXT,
    type TEXT,
    cast TEXT,
    budget REAL
)
''')

cursor.execute('''
CREATE TABLE IF NOT EXISTS personas (
    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
    ageStart INTEGER,
    ageEnd INTEGER,
    gender TEXT,
    ethnicity TEXT,
    location TEXT,
    profession TEXT,
    education TEXT,
    income TEXT
)
''')

cursor.execute('''
CREATE TABLE IF NOT EXISTS reviews (
    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
    simulation INTEGER NOT NULL,
    source TEXT NOT NULL,
    review TEXT NOT NULL,
    rating REAL NOT NULL,
    
    FOREIGN KEY (simulation) REFERENCES simulations(ID)
)
''')

con.commit()

In [27]:
def create_simulation_in_db(con, content):
    simulation = {
        **content,
        "cast": ",".join(content["cast"])
    }
    with con:
        con.execute("INSERT INTO simulations VALUES(NULL, :name, :type, :cast, :budget);", simulation)

def create_review_in_db(con, simulation, persona, review):
    with con:
        con.execute("INSERT INTO reviews VALUES(:simulation, :source, :review, :rating", review)

def create_persona_in_db(con, persona):
    data = generate_demography_prompt_input(persona)
    with con:
        con.execute("""
            INSERT INTO personas VALUES (:ageStart, :ageEnd, :gender, :ethnicity, :location, :profession, :education, :income)
        """, data)

In [28]:
with con:
    create_simulation_in_db(con, content)