In [1]:
%env OPENAI_API_KEY="Your api key here"

env: OPENAI_API_KEY=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


In [2]:
from pprint import pprint
import pandas as pd
import nest_asyncio
from model import llm

from langchain_core.documents import Document

nest_asyncio.apply()

# Generate stories from a similar structure.

In [35]:
prompts = ["Generate a description of a person and their background of a person doing person things."]*5
stories = await llm.with_config(
    configurable={"temperature": 2, "model": "gpt-3.5-turbo"}
).abatch(prompts)
stories

ValueError: Configurable key 'temperature' not found in runnable with config keys: {'llm_model_name', 'llm_temperature'}

In [23]:
documents = [Document(text.content) for text in stories]


# Schema Extractions

In [24]:
from chains import create_schema_inference_parser, extract_schema
from outputparser import SchemaInferenceParser

##### Extracting an implicit schema from the documents. The llm will discover the structure.

In [25]:
output_parser_model = await create_schema_inference_parser(documents)
pprint(output_parser_model)

{'fields': [{'description': 'The age of the person',
             'name': 'age',
             'type': 'int'},
            {'description': 'The profession of the person',
             'name': 'profession',
             'type': 'str'},
            {'description': 'A list of hobbies the person enjoys',
             'name': 'hobbies',
             'type': 'List[str]'},
            {'description': 'A list of personal qualities',
             'name': 'qualities',
             'type': 'List[str]'},
            {'description': 'Belief that small acts of kindness can make a big '
                            'difference',
             'name': 'belief_in_kindness',
             'type': 'bool'},
            {'description': "The individual's background including education "
                            'and professional experience.',
             'name': 'background',
             'type': 'str'},
            {'description': "A list of the individual's interests and hobbies.",
             'name': 'i

##### You can give it some guidance to isolate specific information.

In [27]:
output_parser_model_guided = await create_schema_inference_parser(
    documents, "From this data, only extract name, age, and interests"
)

In [28]:
pprint(output_parser_model_guided)

{'fields': [{'description': 'The name of the person',
             'name': 'name',
             'type': 'str'},
            {'description': 'The age of the person',
             'name': 'age',
             'type': 'int'},
            {'description': 'A list of interests of the person',
             'name': 'interests',
             'type': 'List[str]'}],
 'name': 'UserProfile'}


##### This is a good place to edit the schema to your liking.

In [29]:
output_parser_model_guided['fields'].pop(-1)
output_parser_model_guided

{'name': 'UserProfile',
 'fields': [{'name': 'name',
   'type': 'str',
   'description': 'The name of the person'},
  {'name': 'age', 'type': 'int', 'description': 'The age of the person'}]}

# Create parser from model

In [32]:
parser = SchemaInferenceParser.create_infered_json_parser(output_parser_model)

In [33]:
results = await extract_schema(documents, parser)
pprint(results)

[{'age': 35,
  'background': 'grew up in a small town in the Midwest, worked hard to put '
                'herself through college and eventually land her dream job in '
                'the city',
  'belief_in_kindness': None,
  'birthplace': None,
  'career': 'marketing',
  'character_traits': ['kind', 'witty', 'determined'],
  'currentRole': None,
  'current_city': None,
  'education': 'college',
  'education_level': 'college',
  'family_background': 'comes from humble beginnings',
  'family_visits': None,
  'goals': None,
  'hobbies': ['practicing yoga',
              'exploring new restaurants',
              'volunteering at a local animal shelter'],
  'hometown_size': 'small',
  'interests': ['yoga', 'exploring new restaurants', 'volunteering'],
  'occupation': 'marketing executive',
  'origin': None,
  'personal_traits': ['kind heart', 'quick wit', 'determination'],
  'personalityTraits': ['kind', 'witty', 'determined'],
  'profession': 'marketing executive',
  'qualities': ['

In [34]:

df = pd.DataFrame(results)
df.head()


Unnamed: 0,age,profession,hobbies,qualities,belief_in_kindness,background,interests,origin,currentRole,personalityTraits,...,career,family_visits,personal_traits,occupation,character_traits,hometown_size,education_level,volunteer_work,work_ethic,sense_of_gratitude
0,35.0,marketing executive,"[practicing yoga, exploring new restaurants, v...","[kind heart, quick wit, unwavering determination]",,"grew up in a small town in the Midwest, worked...","[yoga, exploring new restaurants, volunteering]",,,"[kind, witty, determined]",...,marketing,,"[kind heart, quick wit, determination]",marketing executive,"[kind, witty, determined]",small,college,local animal shelter,strong work ethic,True
1,35.0,account executive,"[practicing yoga, going for runs along the lak...","[creativity, attention to detail, driven, ambi...",,Born and raised in a small town in the Midwest...,"[yoga, running, cooking, art, brunch with frie...",,respected account executive,"[creative, detail-oriented, driven, ambitious,...",...,Advertising,Regularly travels back home to visit her paren...,"[creative, detail-oriented, driven, ambitious,...",account executive,"[creative, detail-oriented, driven, ambitious,...",small,college graduate,,strong work ethic and sense of determination,
2,35.0,counselor,"[volunteering at a local animal shelter, spend...","[kind-hearted, hardworking, compassionate, res...",True,"Grew up in a small town in the Midwest, raised...","[volunteering, animal welfare, hiking, fitness]",,,"[kind-hearted, hardworking, compassionate, res...",...,Counselor at a local shelter for homeless youth,,"[kind-hearted, hardworking, compassionate, res...",Counselor,"[kind-hearted, hardworking, compassionate, res...",small,College degree,Local animal shelter,Dedicated to making a positive impact on the w...,True
3,28.0,social work,"[volunteering at a local animal shelter, walki...","[empathetic, willingness to help, dedicated, p...",True,Graduated from college with a degree in social...,"[volunteering, outdoors, hiking, exploring nat...",,,"[empathetic, positive, hopeful]",...,working at a local non-profit organization sup...,,"[empathetic, willingness to help, dedicated, p...",social worker,"[compassion, selflessness]",small town,college degree,volunteering at a local animal shelter,,
4,,businesswoman,"[running, hitting the gym, participating in yo...","[warm, welcoming, willing to lend a helping hand]",,"Originally from Brazil, moved to the United St...","[fitness, wellness, healthy recipes]",Brazil,high-ranking position at a prestigious investm...,"[vibrant, charismatic, driven, ambitious, posi...",...,businesswoman in the finance sector,,"[vibrant, charismatic, warm, welcoming, driven...",businesswoman,"[vibrant, charismatic, warm, welcoming, driven...",,university degree,,,


In [5]:
test = llm.with_config(configurable={"temperature": 2, "model": "gpt-3.5-turbo"})

In [8]:
test.config

{'configurable': {'temperature': 2, 'model': 'gpt-3.5-turbo'}}

In [2]:
from outputparser import SchemaInferenceParser, type_map

In [14]:
import json
type_map_str = json.dumps({k: k for k in type_map.keys()})
type_map_str




'{"str": "str", "int": "int", "float": "float", "bool": "bool", "List[str]": "List[str]", "List[int]": "List[int]", "List[float]": "List[float]", "List[bool]": "List[bool]", "List[dict]": "List[dict]"}'

In [19]:
for k in type_map.keys():
    type_map_str = type_map_str.replace(f': "k"', f': -')
type_map_str

'{"str": "str", "int": "int", "float": "float", "bool": "bool", "List[str]": "List[str]", "List[int]": "List[int]", "List[float]": "List[float]", "List[bool]": "List[bool]", "List[dict]": "List[dict]"}'

In [24]:
modified_string = (
    type_map_str.replace('": "', '": ').replace('", ', ', ').replace('"}', "}")
)

In [25]:
modified_string

'{"str": str, "int": int, "float": float, "bool": bool, "List[str]": List[str], "List[int]": List[int], "List[float]": List[float], "List[bool]": List[bool], "List[dict]": List[dict]}'