In [1]:
import pandas as pd
import warnings

warnings.filterwarnings(action="ignore", message="unclosed", category=ImportWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning) 
pd.set_option('display.max_colwidth', 0)

In [2]:
import openai
from helpers import get_env

API_KEY, RESOURCE_ENDPOINT = get_env("azure-openai")

openai.api_type = "azure"
openai.api_key = API_KEY
openai.api_base = RESOURCE_ENDPOINT
openai.api_version = "2022-12-01"

In [3]:
import os
data_dir = os.path.join(os.curdir,'./../data/raw/pdfs')
pdf_files = sorted([x for x in os.listdir(data_dir) if 'DS_Store' not in x])
len(pdf_files)

147

### Setup redis

In [4]:
from database import get_redis_connection
from redis import Redis
from redis.commands.search.query import Query
from redis.commands.search.field import (
    TextField,
    VectorField,
    NumericField
)
from redis.commands.search.indexDefinition import (
    IndexDefinition,
    IndexType
)

redis_client = get_redis_connection()
redis_client.ping()

True

In [5]:
# Create search index

# Set our default models and chunking size
from config import VECTOR_FIELD_NAME
# Constants
VECTOR_DIM = 1536 #len(data['title_vector'][0]) # length of the vectors
#VECTOR_NUMBER = len(data)                 # initial number of vectors
PREFIX = "testdocs"                            # prefix for the document keys
DISTANCE_METRIC = "COSINE"                # distance metric for the vectors (ex. COSINE, IP, L2)

# Index
INDEX_NAME = "t1-index"           # name of the search index
VECTOR_FIELD_NAME = 'content_vector'

# Define RediSearch fields for each of the columns in the dataset
# This is where you should add any additional metadata you want to capture
filename = TextField("filename")
text_chunk = TextField("text_chunk")
file_chunk_index = NumericField("file_chunk_index")

# define RediSearch vector fields to use HNSW index

text_embedding = VectorField(VECTOR_FIELD_NAME,
    "HNSW", {
        "TYPE": "FLOAT32",
        "DIM": VECTOR_DIM,
        "DISTANCE_METRIC": DISTANCE_METRIC
    }
)
# Add all our field objects to a list to be created as an index
fields = [filename,text_chunk,file_chunk_index,text_embedding]

In [6]:
# Optional step to drop the index if it already exists
#redis_client.ft(INDEX_NAME).dropindex()

# Check if index exists
try:
    redis_client.ft(INDEX_NAME).info()
    print("Index already exists")
except Exception as e:
    print(e)
    # Create RediSearch Index
    print('Not there yet. Creating')
    redis_client.ft(INDEX_NAME).create_index(
        fields = fields,
        definition = IndexDefinition(prefix=[PREFIX], index_type=IndexType.HASH)
    )

Index already exists


# Ingestion

Handled in the embeddings.py file

In [7]:
# Check that our docs have been inserted
redis_client.ft(INDEX_NAME).info()['num_docs']

'859'

# Query

In [8]:
from database import get_redis_results

In [10]:
%%time

f1_query='what is Boeing Global Services'

result_df = get_redis_results(redis_client,f1_query,index_name=INDEX_NAME)
result_df.head(2)

CPU times: user 15.2 ms, sys: 3.29 ms, total: 18.5 ms
Wall time: 622 ms


Unnamed: 0,id,result,certainty
0,0,"Engineering, Modifications Maintenance pro- vides technical fleet solutions for Boeing Global Services’ commercial and government customers around the world to convert, sustain and upgrade any type of aircraft flown today, with efficient custom and standard services in engineering and touch-labor. The diverse team has a formal presence in numerous countries and performs field service work in most markets. Seamless support is possible thanks to Fleet Care engineering services—supporting full fleet operations—in addition to multiple global field service teams, heavy airframe modification facilities, customer service and maintenance, repair, overhaul (MRO) operations centers, and 247 technical support for spare parts. Boeing Global Services’ supply chain operates one of the industry’s most comprehensive parts and services networks. Using supplier manage- ment expertise and advanced analytics capa- bilities, we support all aspects of aircraft fleet, parts and service, including airplane-on-ground support. We also deliver customer-focused solutions such as component repair or landing gear overhaul and exchange. Whether it’s for aircraft in or out of production, we understand the material and logistics support for aircraft, notably spare parts, retrofit kits, distribution and life-cycle management, as well as perfor- mance-based logistics agreements such as the C-17 Globemaster III Integrated Sustainment Program. Global Services has distribution centers and component repair facilities in nine countries, supporting customers worldwide. Training Professional Services is a leader in commercial and defense aviation training and capabilities, including software development, engineering and technical services, training center support and pilot provisioning. Combined with our global infrastructure and research facil- ities, we offer custom simulators, training equip- ment and tailored software. Advances in these technologies better connect pilots in the sky with simulators on the ground, reducing aircraft usage and extending fleet life. “Professional services” describes our intention to look at the ecosystem of aviation and provide a complete solution, including services such as infrastructure andor program development and facilities management. 1010289bo119-130. indd 127 127 3118 7:28 PM 01-Mar-18 1010289bo119-130T 1010289bo amoore C M Y K 8.3750 X 10.",0.161366820335
1,1,"Our services expertise, the global reach of our busi- ness and our strong customer partnerships have us well positioned to compete and win for the long-term in areas such as spare parts, maintenance, modifica- tions and upgrades, training. And, increasingly, digital solutions, which generated approximately $1 billion in revenue in 2017. Early key wins include: An F-15 sustainment contract for Qatar. An order for seven 737-800 Boeing Converted Freighters for GECAS. A 787 landing gear exchange agreement with All Nippon Airways. A P-8I training agreement with India. Accelerating Our Pace of Progress In 2017, driven in part by a global business environ- ment that is growing more competitive by the day, we promoted a productive sense of urgency at all levels of the company to accelerate the pace of our innovation. Succeeding in rapidly changing global markets requires that we think and do things differently. It demands change, a willingness to embrace it and the agility to both drive and respond to external forces. The com- petition is not standing still, and neither are we. Over the past year, we made several strategic invest- ments to better position ourselves for future markets and growth. We stood up Boeing Global Services to expand our share of the aerospace services market. 1010289botxt. indd 7 7 3518 4:38 PM 1010289botxtp2,3,7,8,9T 8.3750 X 10.8750 We continued to develop new commercial, defense and space products and services such as the 787-10 and 777X, the KC-46 tanker and T-X trainer aircraft, and the CST-100 Starliner and Space Launch System. We took steps to strengthen our capabilities in areas such as data analytics, avionics, actuation and additive manufacturing—high-value work that benefits from our internal expertise and global scale. We launched our Boeing HorizonX innovation cell and formed partnerships with several companies whose technology can change the competitive landscape. We acquired Aurora Flight Sciences to bolster our autonomous-flight portfolio.",0.171360313892
