In [0]:

# Load the section metadatas
import json
import pandas as pd

with open("/Volumes/main/casml/raw_data/book-metadata.json", "r") as f:
    book_metadata = json.load(f)

# Map page-nums to sections
def map_page_to_section(metadata):
    page_map = {}
    
    for section, sec_data in metadata.items():
        sec_start = sec_data.get("page_start")
        sec_end = sec_data.get("page_end")
        subsections = sec_data.get("subsections", {})

        for page in range(sec_start, sec_end + 1):
            page_map[page] = {"section": section}

        for subsec, subsec_data in subsections.items():
            sub_start = subsec_data.get("page_start")
            sub_end = subsec_data.get("page_end")
            for page in range(sub_start, sub_end + 1):
                page_map[page] = {"section": f'{section}/{subsec}'}

    return page_map

page_to_section = map_page_to_section(book_metadata)

# Convert page_to_section dict to a DataFrame
page_section_df = pd.DataFrame([
    {"page_num": page, "section": info["section"]}
    for page, info in page_to_section.items()
])

# Create Spark DataFrame
page_section_sdf = spark.createDataFrame(page_section_df)

# Create a temporary SQL table
page_section_sdf.createOrReplaceTempView("pag_to_section_temp")



In [0]:
%sql
USE main.casml;
-- DROP TABLE knowledge_base;
CREATE TABLE IF NOT EXISTS knowledge_base (
  id BIGINT GENERATED ALWAYS AS IDENTITY,
  page_num INT,
  content STRING,
  section STRING
)
TBLPROPERTIES (delta.enableChangeDataFeed = true);

In [0]:
%sql
INSERT OVERWRITE TABLE knowledge_base (page_num, content)
SELECT
first_value(e.value:bbox[0]:page_id) AS page_num,
string_agg(e.value:content) AS content
FROM (
  SELECT ai_parse_document(content) AS parsed_document, path
  FROM READ_FILES('/Volumes/main/casml/raw_data/book-1-20.pdf', format => 'binaryFile')
) AS f,
LATERAL variant_explode(f.parsed_document:document:elements) AS e
GROUP BY e.value:bbox[0]:page_id::int, f.path
HAVING page_num::int > 7




num_affected_rows,num_inserted_rows
12,12


In [0]:
%sql
MERGE INTO knowledge_base AS kb
USING pag_to_section_temp AS pts
ON kb.page_num = pts.page_num
WHEN MATCHED THEN
  UPDATE SET kb.section = pts.section;

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
12,12,0,0


In [0]:
%sql
SELECT * from knowledge_base

id,page_num,content,section
48,13,"2 Prefacestudies and current and emerging research. The text also includes coverage of the DSM-5 in examinations of psychological disorders. Psychology 2e incorporates discussions that reflect the diversity within the discipline, as well as the diversity of cultures and communities across the globe.Coverage and scopeThe first edition of Psychology has been used by thousands of faculty and hundreds of thousands of students since its publication in 2015. OpenStax mined our adopters' extensive and helpful feedback to identify the most significant revision needs while maintaining the organization that many instructors had incorporated into their courses. Specific surveys, pre-revision reviews, and customization analysis, as well as analytical data from OpenStax partners and online learning environments, all aided in planning the revision.The result is a book that thoroughly treats psychology's foundational concepts while adding current and meaningful coverage in specific areas. Psychology 2e retains its manageable scope and contains ample features to draw learners into the discipline.Structurally, the textbook remains similar to the first edition, with no chapter reorganization and very targeted changes at the section level.Chapter 1: Introduction to PsychologyChapter 2: Psychological ResearchChapter 3: BiopsychologyChapter 4: States of ConsciousnessChapter 5: Sensation and PerceptionChapter 6: LearningChapter 7: Thinking and IntelligenceChapter 8: MemoryChapter 9: Lifespan DevelopmentChapter 10: Motivation and EmotionChapter 11: PersonalityChapter 12: Social PsychologyChapter 13: Industrial-Organizational PsychologyChapter 14: Stress, Lifestyle, and HealthChapter 15: Psychological DisordersChapter 16: Therapy and TreatmentChanges to the Second EditionOpenStax only undertakes second editions when significant modifications to the text are necessary. In the case of Psychology 2e, user feedback indicated that we needed to focus on a few key areas, which we have done in the following ways.Content revisions for clarity, accuracy, and currencyThe revision plan varied by chapter based on need. Some chapters were significantly updated for conceptual coverage, research-informed data, and clearer language. In other chapters, the revisions focused mostly on currency of examples and updates to statistics.Over 210 new research references have been added or updated in order to improve the scholarly underpinnings of the material and broaden the perspective for students. Dozens of examples and feature boxes have been changed or added to better explain concepts and/or increase relevance for students.Research replication and validityTo engage students in stronger critical analysis and inform them about research reproducibility, substantial coverage has been added to the research chapter and strategically throughout the textbook whenever keyAccess for free at openstax.org",Introduction to Psychology/History of Psychology
47,11,Access for free at openstax.org,Introduction to Psychology/History of Psychology
46,15,"4 Prefaceand Social Norms,"" and ""Conditioning and OCD.""Art, interactives, and assessments that engageOur art program is designed to enhance students' understanding of psychological concepts through simple, effective graphs, diagrams, and photographs. Psychology 2e also incorporates links to relevant interactive exercises and animations that help bring topics to life. Selected assessment items touch directly on students' lives.Link to Learning features direct students to online interactive exercises and animations that add a fuller context to core content and provide an opportunity for application.Personal Application Questions engage students in topics at a personal level to encourage reflection and promote discussion.Additional ResourcesStudent and Instructor ResourcesWe've compiled additional resources for both students and instructors, including Getting Started Guides, an instructor solution guide, a test bank, and PowerPoint slides. Instructor resources require a verified instructor account, which you can apply for when you log in or create your account on openstax.org. Take advantage of these resources to supplement your OpenStax book.Community HubsOpenStax partners with the Institute for the Study of Knowledge Management in Education (ISKME) to offer Community Hubs on OER Commons—a platform for instructors to share community-created resources that support OpenStax books, free of charge. Through our Community Hubs, instructors can upload their own materials or download resources to use in their own courses, including additional ancillaries, teaching material, multimedia, and relevant course content. We encourage instructors to join the hubs for the subjects most relevant to your teaching and research as an opportunity both to enrich your courses and to engage with other faculty.To reach the Community Hubs, visit www.oercommons.org/hubs/openstax.Technology partnersAs allies in making high-quality learning materials accessible, our technology partners offer optional low-cost tools that are integrated with OpenStax books. To access the technology options for your text, visit your book page on openstax.org.About the authorsSenior contributing authorsRose M. Spielman (Content Lead)Dr. Rose Spielman has been teaching psychology and working as a licensed clinical psychologist for 20 years. Her academic career has included positions at Quinnipiac University, Housatonic Community College, and Goodwin College. As a licensed clinical psychologist, educator, and volunteer director, Rose is able to connect with people from diverse backgrounds and facilitate treatment, advocacy, and education. In her years of work as a teacher, therapist, and administrator, she has helped thousands of students and clients and taught them to advocate for themselves and move their lives forward to become more productive citizens and family members.William J. Jenkins, Mercer University Marilyn D. Lovett, Spelman CollegeAccess for free at openstax.org",Introduction to Psychology/History of Psychology
45,17,"6 PrefaceChristine Selby, Husson University Sally B. Seraphin, Centre College Brian Sexton, Kean University Nancy Simpson, Trident Technical College Jason M. Smith, Federal Bureau of Prisons – FCC Hazelton Robert Stennett, University of Georgia Jennifer Stevenson, Ursinus College Eric Weiser, Curry College Jay L. Wenger, Harrisburg Area Community College Alan Whitehead, Southern Virginia University Valjean Whitlow, American Public University Rachel Wu, University of California, Riverside Alexandra Zelin, University of Tennessee at ChattanoogaAccess for free at openstax.org",Introduction to Psychology/History of Psychology
44,16,"Preface 5Contributing AuthorsMara Arguete, Lincoln University Laura Bryant, Eastern Gateway Community College Barbara Chappell, Walden University Kathryn Dumper, Bainbridge State College Arlene Lacombe, Saint Joseph's University Julie Lazzara, Paradise Valley Community College Tammy McClain, West Liberty University Barbara B. Oswald, Miami University Marion Perlmutter, University of Michigan Mark D. Thomas, Albany State UniversityReviewersPatricia G. Adams, Pitt Community College Daniel Bellack, Trident Technical College Christopher M. Bloom, Providence College Jerimy Blowers, Cayuga Community College Salena Brody, Collin College David A. Caicedo, Borough of Manhattan Community College, CUNY Bettina Casad, University of Missouri–St. Louis Sharon Chacon, Northeast Wisconsin Technical College James Corpening Frank Eyetsemitan, Roger Williams University Tamara Ferguson, Utah State University Kathleen Flannery, Saint Anselm College Johnathan Forbey, Ball State University Laura Gaudet, Chadron State College William Goggin, University of Southern Mississippi Jeffery K. Gray, Charleston Southern University Heather Griffiths, Fayetteville State University Mark Holder, University of British Columbia Rita Houge, Des Moines Area Community College Colette Jacquot, Strayer University John Johanson, Winona State University Andrew Johnson, Park University Shaila Khan, Tougaloo College Cynthia Kreutzer, Georgia State University Perimeter College at Clarkston Campus Carol Laman, Houston Community College Dana C. Leighton, Texas A&M University—Texarkana Thomas Malloy, Rhode Island College Jan Mendoza, Golden West College Christopher Miller, University of Minnesota Lisa Moeller, Beckfield College Amy T. Nusbaum, Heritage University Jody Resko, Queensborough Community College (CUNY) Hugh Riley, Baylor University Juan Salinas, University of Texas at Austin Brittney Schrick, Southern Arkansas University Phoebe Scotland, College of the Rockies",Introduction to Psychology/History of Psychology
43,10,CHAPTER 15 Psychological DisordersIntroduction 537 15.1 What Are Psychological Disorders? 538 15.2 Diagnosing and Classifying Psychological Disorders 542 15.3 Perspectives on Psychological Disorders 545 15.4 Anxiety Disorders 548 15.5 Obsessive-Compulsive and Related Disorders 554 15.6 Posttraumatic Stress Disorder 558 15.7 Mood and Related Disorders 560 15.8 Schizophrenia 570 15.9 Dissociative Disorders 574 15.10 Disorders in Childhood 576 15.11 Personality Disorders 582 Key Terms 589 Summary 591 Review Questions 594 Critical Thinking Questions 597 Personal Application Questions 598CHAPTER 16 Therapy and Treatment 599Introduction 599 16.1 Mental Health Treatment: Past and Present 600 16.2 Types of Treatment 605 16.3 Treatment Modalities 617 16.4 Substance-Related and Addictive Disorders: A Special Case 621 16.5 The Sociocultural Model and Therapy Utilization 623 Key Terms 627 Summary 628 Review Questions 630 Critical Thinking Questions 632 Personal Application Questions 632 References 633 Index 733,Introduction to Psychology/History of Psychology
42,19,"8 1 Introduction to Psychology2002). Nash was the subject of the 2001 movie A Beautiful Mind. Why did these people have these experiences? How does the human brain work? And what is the connection between the brain's internal processes and people's external behaviors? This textbook will introduce you to various ways that the field of psychology has explored these questions.1.1 What Is Psychology?LEARNING OBJECTIVESBy the end of this section, you will be able to:Define psychologyUnderstand the merits of an education in psychologyWhat is creativity? What are prejudice and discrimination? What is consciousness? The field of psychology explores questions like these. Psychology refers to the scientific study of the mind and behavior. Psychologists use the scientific method to acquire knowledge. To apply the scientific method, a researcher with a question about how or why something happens will propose a tentative explanation, called a hypothesis, to explain the phenomenon. A hypothesis should fit into the context of a scientific theory, which is a broad explanation or group of explanations for some aspect of the natural world that is consistently supported by evidence over time. A theory is the best understanding we have of that part of the natural world. The researcher then makes observations or carries out an experiment to test the validity of the hypothesis. Those results are then published or presented at research conferences so that others can replicate or build on the results.Scientists test that which is perceivable and measurable. For example, the hypothesis that a bird sings because it is happy is not a hypothesis that can be tested since we have no way to measure the happiness of a bird. We must ask a different question, perhaps about the brain state of the bird, since this can be measured. However, we can ask individuals about whether they sing because they are happy since they are able to tell us. Thus, psychological science is empirical, based on measurable data.In general, science deals only with matter and energy, that is, those things that can be measured, and it cannot arrive at knowledge about values and morality. This is one reason why our scientific understanding of the mind is so limited, since thoughts, at least as we experience them, are neither matter nor energy. The scientific method is also a form of empiricism. An empirical method for acquiring knowledge is one based on observation, including experimentation, rather than a method based only on forms of logical argument or previous authorities.It was not until the late 1800s that psychology became accepted as its own academic discipline. Before this time, the workings of the mind were considered under the auspices of philosophy. Given that any behavior is, at its roots, biological, some areas of psychology take on aspects of a natural science like biology. No biological organism exists in isolation, and our behavior is influenced by our interactions with others. Therefore, psychology is also a social science.WHY STUDY PSYCHOLOGY?Often, students take their first psychology course because they are interested in helping others and want to learn more about themselves and why they act the way they do. Sometimes, students take a psychology course because it either satisfies a general education requirement or is required for a program of study such as nursing or pre-med. Many of these students develop such an interest in the area that they go on to declare psychology as their major. As a result, psychology is one of the most popular majors on college campuses across the United States (Johnson & Lubin, 2011). A number of well-known individuals were psychology majors. Just a few famous names on this list are Facebook's creator Mark Zuckerberg, television personality and political satirist Jon Stewart, actress Natalie Portman, and filmmaker Wes Craven (Halonen, 2011). About 6 percent of all bachelor degrees granted in the United States are in the discipline of psychology (U.S. Department of Education, 2016).Access for free at openstax.org",Introduction to Psychology/Contemporary Psychology
41,18,"1Introduction to PsychologyFIGURE 1.1 Psychology is the scientific study of mind and behavior. (credit ""background"": modification of work by Nattachai Noogure; credit ""top left"": modification of work by Peter Shanks; credit ""top middle"": modification of work by ""devinfl""/Flickr; credit ""top right"": modification of work by Alejandra Quintero Sinisterra; credit ""bottom left"": modification of work by Gabriel Rocha; credit ""bottom middle-left"": modification of work by Caleb Roenigk; credit ""bottom middle-right"": modification of work by Staffan Scherz; credit ""bottom right"": modification of work by Czech Provincial Reconstruction Team)CHAPTER OUTLINE1.1 What Is Psychology?1.2 History of Psychology1.3 Contemporary Psychology1.4 Careers in PsychologyINTRODUCTION Clive Wearing is an accomplished musician who lost his ability to form new memories when he became sick at the age of 46. While he can remember how to play the piano perfectly, he cannot remember what he ate for breakfast just an hour ago (Sacks, 2007). James Wannerton experiences a taste sensation that is associated with the sound of words. His former girlfriend's name tastes like rhubarb (Mundasad, 2013). John Nash is a brilliant mathematician and Nobel Prize winner. However, while he was a professor at MIT, he would tell people that the New York Times contained coded messages from extraterrestrial beings that were intended for him. He also began to hear voices and became suspicious of the people around him. Soon thereafter, Nash was diagnosed with schizophrenia and admitted to a state-run mental institution (O'Connor & Robertson,",Introduction to Psychology/Contemporary Psychology
40,9,"11.7 Trait Theorists 379 11.8 Cultural Understandings of Personality 384 11.9 Personality Assessment 386 Key Terms 391 Summary 392 Review Questions 394 Critical Thinking Questions 397 Personal Application Questions 397CHAPTER 12 Social Psychology 399Introduction 399 12.1 What Is Social Psychology? 400 12.2 Self-presentation 406 12.3 Attitudes and Persuasion 409 12.4 Conformity, Compliance, and Obedience 415 12.5 Prejudice and Discrimination 422 12.6 Aggression 429 12.7 Prosocial Behavior 432 Key Terms 437 Summary 439 Review Questions 440 Critical Thinking Questions 444 Personal Application Questions 444CHAPTER 13 Industrial-Organizational PsychologyIntroduction 447 13.1 What Is Industrial and Organizational Psychology? 448 13.2 Industrial Psychology: Selecting and Evaluating Employees 456 13.3 Organizational Psychology: The Social Dimension of Work 467 13.4 Human Factors Psychology and Workplace Design 477 Key Terms 480 Summary 481 Review Questions 481 Critical Thinking Questions 483 Personal Application Questions 484CHAPTER 14 Stress, Lifestyle, and HealthIntroduction 485 14.1 What Is Stress? 486 14.2 Stressors 496 14.3 Stress and Illness 502 14.4 Regulation of Stress 514 14.5 The Pursuit of Happiness 521 Key Terms 529 Summary 530 Review Questions 531 Critical Thinking Questions 534 Personal Application Questions 535Access for free at openstax.org",Introduction to Psychology/History of Psychology
39,8,"Critical Thinking Questions 246 Personal Application Questions 246CHAPTER 8Memory 247 Introduction 247 8.1 How Memory Functions 248 8.2 Parts of the Brain Involved with Memory 255 8.3 Problems with Memory 259 8.4 Ways to Enhance Memory 269 Key Terms 273 Summary 274 Review Questions 275 Critical Thinking Questions 276 Personal Application Questions 277CHAPTER 9Lifespan Development 279 Introduction 279 9.1 What Is Lifespan Development? 280 9.2 Lifespan Theories 284 9.3 Stages of Development 292 9.4 Death and Dying 313 Key Terms 315 Summary 316 Review Questions 317 Critical Thinking Questions 319 Personal Application Questions 320CHAPTER 10 Emotion and Motivation 321 Introduction 321 10.1 Motivation 322 10.2 Hunger and Eating 328 10.3 Sexual Behavior 334 10.4 Emotion 342 Key Terms 353 Summary 354 Review Questions 355 Critical Thinking Questions 357 Personal Application Questions 357CHAPTER 11 Personality 359Introduction 359 11.1 What Is Personality? 360 11.2 Freud and the Psychodynamic Perspective 362 11.3 Neo-Freudians: Adler, Erikson, Jung, and Horney 368 11.4 Learning Approaches 373 11.5 Humanistic Approaches 377 11.6 Biological Approaches 378",Introduction to Psychology/What Is Psychology?


In [0]:
%pip install -U -qqqq mlflow>=3.1.1 langchain=0.4.0.dev0 langgraph databricks-langchain pydantic databricks-agents unitycatalog-langchain[databricks] uv databricks-feature-engineering==0.12.1
dbutils.library.restartPython()



[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:
import time

def endpoint_exists(vsc, vs_endpoint_name):
  try:
    return vs_endpoint_name in [e['name'] for e in vsc.list_endpoints().get('endpoints', [])]
  except Exception as e:
    #Temp fix for potential REQUEST_LIMIT_EXCEEDED issue
    if "REQUEST_LIMIT_EXCEEDED" in str(e):
      print("WARN: couldn't get endpoint status due to REQUEST_LIMIT_EXCEEDED error. The demo will consider it exists")
      return True
    else:
      raise e

def wait_for_vs_endpoint_to_be_ready(vsc, vs_endpoint_name):
  for i in range(180):
    try:
      endpoint = vsc.get_endpoint(vs_endpoint_name)
    except Exception as e:
      #Temp fix for potential REQUEST_LIMIT_EXCEEDED issue
      if "REQUEST_LIMIT_EXCEEDED" in str(e):
        print("WARN: couldn't get endpoint status due to REQUEST_LIMIT_EXCEEDED error. Please manually check your endpoint status")
        return
      else:
        raise e
    status = endpoint.get("endpoint_status", endpoint.get("status"))["state"].upper()
    if "ONLINE" in status:
      return endpoint
    elif "PROVISIONING" in status or i <6:
      if i % 20 == 0: 
        print(f"Waiting for endpoint to be ready, this can take a few min... {endpoint}")
      time.sleep(10)
    else:
      raise Exception(f'''Error with the endpoint {vs_endpoint_name}. - this shouldn't happen: {endpoint}.\n Please delete it and re-run the previous cell: vsc.delete_endpoint("{vs_endpoint_name}")''')
  raise Exception(f"Timeout, your endpoint isn't ready yet: {vsc.get_endpoint(vs_endpoint_name)}")

def index_exists(vsc, endpoint_name, index_full_name):
    try:
        vsc.get_index(endpoint_name, index_full_name).describe()
        return True
    except Exception as e:
        if 'RESOURCE_DOES_NOT_EXIST' not in str(e):
            print(f'Unexpected error describing the index. This could be a permission issue.')
            raise e
    return False
  
def wait_for_index_to_be_ready(vsc, vs_endpoint_name, index_name):
  for i in range(180):
    idx = vsc.get_index(vs_endpoint_name, index_name).describe()
    index_status = idx.get('status', idx.get('index_status', {}))
    status = index_status.get('detailed_state', index_status.get('status', 'UNKNOWN')).upper()
    url = index_status.get('index_url', index_status.get('url', 'UNKNOWN'))
    if "ONLINE" in status:
      return
    if "UNKNOWN" in status:
      print(f"Can't get the status - will assume index is ready {idx} - url: {url}")
      return
    elif "PROVISIONING" in status:
      if i % 40 == 0: print(f"Waiting for index to be ready, this can take a few min... {index_status} - pipeline url:{url}")
      time.sleep(10)
    else:
        raise Exception(f'''Error with the index - this shouldn't happen. DLT pipeline might have been killed.\n Please delete it and re-run the previous cell: vsc.delete_index("{index_name}, {vs_endpoint_name}") \nIndex details: {idx}''')
  raise Exception(f"Timeout, your index isn't ready yet: {vsc.get_index(index_name, vs_endpoint_name)}")

In [0]:
VECTOR_SEARCH_ENDPOINT_NAME = "casml_vs_endpoint"

from databricks.vector_search.client import VectorSearchClient
vsc = VectorSearchClient(disable_notice=True)

if not endpoint_exists(vsc, VECTOR_SEARCH_ENDPOINT_NAME):
    endpoints = vsc.list_endpoints()
    if len(endpoints):
        endpoint_names = [ep['name'] for ep in endpoints['endpoints']]
        for name in endpoint_names:
            vsc.delete_endpoint(name)

    vsc.create_endpoint(name=VECTOR_SEARCH_ENDPOINT_NAME, endpoint_type="STANDARD")

wait_for_vs_endpoint_to_be_ready(vsc, VECTOR_SEARCH_ENDPOINT_NAME)
print(f"Endpoint named {VECTOR_SEARCH_ENDPOINT_NAME} is ready.")

Endpoint named casml_vs_endpoint is ready.


In [0]:
from databricks.sdk import WorkspaceClient

#The table we'd like to index
catalog = "main"
dbName = "casml"
source_table_fullname = f"{catalog}.{dbName}.knowledge_base"
# Where we want to store our index
vs_index_fullname = f"{catalog}.{dbName}.knowledge_base_vs_index"

if not index_exists(vsc, VECTOR_SEARCH_ENDPOINT_NAME, vs_index_fullname):
  print(f"Creating index {vs_index_fullname} on endpoint {VECTOR_SEARCH_ENDPOINT_NAME}...")
  vsc.create_delta_sync_index(
    endpoint_name=VECTOR_SEARCH_ENDPOINT_NAME,
    index_name=vs_index_fullname,
    source_table_name=source_table_fullname,
    pipeline_type="TRIGGERED",
    primary_key="id",
    embedding_source_column='content', #The column containing our text
    embedding_model_endpoint_name='databricks-gte-large-en', #The embedding endpoint used to create the embeddings
    columns_to_sync=['page_num', 'section']
  )
  #Let's wait for the index to be ready and all our embeddings to be created and indexed
  wait_for_index_to_be_ready(vsc, VECTOR_SEARCH_ENDPOINT_NAME, vs_index_fullname)
else:
  #Trigger a sync to update our vs content with the new data saved in the tableoug
  wait_for_index_to_be_ready(vsc, VECTOR_SEARCH_ENDPOINT_NAME, vs_index_fullname)
  vsc.get_index(VECTOR_SEARCH_ENDPOINT_NAME, vs_index_fullname).sync()

print(f"index {vs_index_fullname} on table {source_table_fullname} is ready")

index main.casml.knowledge_base_vs_index on table main.casml.knowledge_base is ready


In [0]:
question = "What was the main contribution the 'Chain-of-Thought Prompting Elicits Reasoning in Large Language Models' paper"

results = vsc.get_index(VECTOR_SEARCH_ENDPOINT_NAME, vs_index_fullname).similarity_search(
  query_text=question,
  columns=["id", "content", 'page_num', 'section'],
  num_results=1)
  
docs = results.get('result', {}).get('data_array', [])

[NOTICE] Using a notebook authentication token. Recommended for development only. For improved performance, please use Service Principal based authentication. To disable this message, pass disable_notice=True.


In [0]:
from agent import AGENT 

import ast
import re
import json
import io
import csv

def parse_doc_string(doc_string: str) -> dict | None:
    # Find all occurrences of the document's contents.
    pattern = re.compile(r"metadata=({.*?}), page_content='(.*?)'")
    matches = pattern.findall(doc_string)

    # Iterate through the matches and parse each one
    parsed_list = []
    for metadata_str, page_content_str in matches:
        try:
            # Safely parse the metadata string into a dictionary
            metadata = ast.literal_eval(metadata_str)
            
            # Decode the page content to correctly handle escape sequences like \n
            page_content = page_content_str.encode().decode('unicode_escape')
            
            # Append the structured dictionary to our list
            parsed_list.append({
                'metadata': metadata,
                'page_content': page_content
            })
        except (ValueError, SyntaxError) as e:
            print(f"Skipping a malformed document part due to: {e}")

    # Print the final list of dictionaries with nice formatting
    return parsed_list

def parse_agent_output(answer):
    if len(answer.output) < 3:
        return None
    for i in range(len(answer.output)):
        print(answer.output[i])

    results = {}
    parsed_list = parse_doc_string(answer.output[1].output if len(answer.output) > 1 else '')
    context = ""

    for doc_page_dict in parsed_list:
        context += doc_page_dict["page_content"]
        context += "\n\n"

    final_answer_idx = -1
    for i in range(2, len(answer.output)):
        if answer.output[i].type == "message":
            final_answer_idx = i

    if final_answer_idx != -1:
        content = answer.output[final_answer_idx].content[0]
        if "text" in content.keys():
            results['answer'] = content["text"]
        else:
            results['answer'] = ""
    else:
        results['answer'] = ""

    results['references'] = {"sections": [doc_page_dict["metadata"]["section"] for doc_page_dict in parsed_list],
                                "pages": [int(doc_page_dict["metadata"]["page_num"]) for doc_page_dict in parsed_list]}
    
    return results

def parsed_results_to_csv(data):
    fieldnames = data[0].keys()

    with open("/Volumes/main/casml/raw_data/output.csv", "w", newline='', encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(data)

# Read all queries
with open("/Volumes/main/casml/raw_data/queries.json", "r") as f:
    queries = json.load(f)

results = []
for i, query in enumerate(queries):
    print(f'Query {i}')
    query_id = query['query_id']
    result = AGENT.predict({"input":[{"role": "user", "content": query['question']}]})
    parsed_result = parse_agent_output(result)

    if parsed_result:
        parsed_result['query_id'] = query_id
        results.append(parsed_result)

if len(results) > 0:
    parsed_results_to_csv(results)
else:
    raise Exception("No results obtained")




AGENT llm_endpoint_name:  databricks-meta-llama-3-1-8b-instruct
Model Config:  <mlflow.models.model_config.ModelConfig object at 0xffdf4fd5d460>
Query 0
Query 1
[NOTICE] Using a notebook authentication token. Recommended for development only. For improved performance, please use Service Principal based authentication. To disable this message, pass disable_notice=True.
type='function_call' id='run--ef98fd4b-702a-4b32-88e7-654dda4653e5' call_id='call_8b9305cb-a2f7-4087-bc84-b93f265291aa' name='main__casml__knowledge_base_vs_index' arguments='{"query": "basic parts of a neuron", "filters": null}'
type='function_call_output' call_id='call_8b9305cb-a2f7-4087-bc84-b93f265291aa' output='[Document(metadata={\'section\': \'Introduction to Psychology/What Is Psychology?\', \'page_num\': 8.0, \'id\': 27.0}, page_content=\'Critical Thinking Questions 246\\nPersonal Application Questions 246CHAPTER 8Memory 247\\nIntroduction 247\\n8.1 How Memory Functions 248\\n8.2 Parts of the Brain Involved with 

[Trace(trace_id=tr-1a8a06f8ec326dc147106bf4e0882cb4), Trace(trace_id=tr-a5021129d1536cbf5c798c266c979efa), Trace(trace_id=tr-a8d0ace6fc3c08f5fd295c6d092a5230), Trace(trace_id=tr-6230a8079717b916ba92cf425e68709d), Trace(trace_id=tr-f738deaa12e8b8ee031c02e876885e93), Trace(trace_id=tr-f52bcdf21382e2f3bd2b90be56a30369), Trace(trace_id=tr-4306cb4cfae3020c62dc3ab791b80556), Trace(trace_id=tr-cabb539cdf1a5730be3aaecf192699e0), Trace(trace_id=tr-cf70499479d518c886f4bba9cef0aebd), Trace(trace_id=tr-a479798ec285e133dd408d804d9d502c)]

[0;31m---------------------------------------------------------------------------[0m
[0;31mAttributeError[0m                            Traceback (most recent call last)
File [0;32m<command-8945825502578450>, line 86[0m
[1;32m     84[0m query_id [38;5;241m=[39m query[[38;5;124m'[39m[38;5;124mquery_id[39m[38;5;124m'[39m]
[1;32m     85[0m result [38;5;241m=[39m AGENT[38;5;241m.[39mpredict({[38;5;124m"[39m[38;5;124minput[39m[38;5;124m"[39m:[{[38;5;124m"[39m[38;5;124mrole[39m[38;5;124m"[39m: [38;5;124m"[39m[38;5;124muser[39m[38;5;124m"[39m, [38;5;124m"[39m[38;5;124mcontent[39m[38;5;124m"[39m: query[[38;5;124m'[39m[38;5;124mquestion[39m[38;5;124m'[39m]}]})
[0;32m---> 86[0m parsed_result [38;5;241m=[39m parse_agent_output(result)
[1;32m     88[0m [38;5;28;01mif[39;00m parsed_result:
[1;32m     89[0m     parsed_result[[38;5;124m'[39m[38;5;124mquery_id[39m[38;5;124m'[39m] [38;5;241m=[39m query_id

File [0;32m<command-894582

In [0]:
%pip show langchain
%pip show databricks-langchain

