In [1]:
import pickle
import chromadb
from sentence_transformers import SentenceTransformer
import os

# Define paths for loading data
load_path = r'C:\\Users\\MCOB PHD 14\\Dropbox\\Charlies Dissertation\\Beige Books\\chunks\\saved_data'

# Load the embeddings, document names, and texts
with open(os.path.join(load_path, 'embeddings.pkl'), 'rb') as f:
    embeddings = pickle.load(f)

with open(os.path.join(load_path, 'document_names.pkl'), 'rb') as f:
    document_names = pickle.load(f)

with open(os.path.join(load_path, 'texts.pkl'), 'rb') as f:
    texts = pickle.load(f)

print("Data loaded from disk successfully.")

# Initialize the ChromaDB client
client = chromadb.Client()

# Access or create the collection
collection = client.get_or_create_collection("beige_books_embeddings")

# Generate unique IDs for each document if needed
document_ids = [f"doc_{i}" for i in range(len(document_names))]

# Add the loaded data to the ChromaDB collection
collection.add(
    ids=document_ids,
    documents=texts,
    embeddings=embeddings,
    metadatas=[{"filename": name} for name in document_names]
)

print("Data added to the ChromaDB collection successfully.")

# Initialize the sentence-transformer model for generating query embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')


Data loaded from disk successfully.
Data added to the ChromaDB collection successfully.




In [None]:
# For the display below
from IPython.core.display import display, HTML


In [58]:
# Define your search query
query = "pessimistic economic outlook"
query_embedding = model.encode([query])

# Perform the search
results = collection.query(
    query_embeddings=query_embedding,
    n_results=25  # Number of similar documents to retrieve
)

# Print the search results
for i in range(len(results['documents'][0])):
    print(f"Result {i + 1}:")
    print("Document:", results['metadatas'][0][i]['filename'])
    print("Score:", results['distances'][0][i])
    print("Snippet:", results['documents'][0][i][:2000])
    print()


# Build an HTML table
html_content = "<table border='1'>"
html_content += "<tr><th>Rank</th><th>Document</th><th>Score</th><th>Snippet</th></tr>"
for i in range(len(results['documents'][0])):
    html_content += f"<tr>"
    html_content += f"<td>{i + 1}</td>"
    html_content += f"<td>{results['metadatas'][0][i]['filename']}</td>"
    html_content += f"<td>{results['distances'][0][i]:.4f}</td>"
    html_content += f"<td>{results['documents'][0][i][:2000]}</td>"
    html_content += f"</tr>"
html_content += "</table>"

# Display in Jupyter Notebook
display(HTML(html_content))



Result 1:
Document: 1972_ri (1)_chunk_3.txt
Score: 0.8303863406181335
Snippet: Optimism concerning the economic outlook remains high among District businessmen and bankers . More than two-thirds of the backing respondents expect an improvement in business activity in their areas in the immediate future .

Result 2:
Document: 2005_bo (5)_chunk_6.txt
Score: 0.887150228023529
Snippet: pace of growth . The economy is viewed as somewhat uncertain ; it is said to be currently steady -- not robust , not deteriorating -- but with more downside than upside risks .

Result 3:
Document: 1979_ns (4)_chunk_1.txt
Score: 0.9183801412582397
Snippet: August 7 , 1979 This month 's REDBOOK reports carry a clear message : a general softening in activity began in the second . quarter and . no early reversal is in sight . The decline has centered in housing and . consumption spending , but attempts to prevent buildups of inventories , and dimming prospects for capital spending are also factors . Despite wea

Rank,Document,Score,Snippet
1,1972_ri (1)_chunk_3.txt,0.8304,Optimism concerning the economic outlook remains high among District businessmen and bankers . More than two-thirds of the backing respondents expect an improvement in business activity in their areas in the immediate future .
2,2005_bo (5)_chunk_6.txt,0.8872,"pace of growth . The economy is viewed as somewhat uncertain ; it is said to be currently steady -- not robust , not deteriorating -- but with more downside than upside risks ."
3,1979_ns (4)_chunk_1.txt,0.9184,"August 7 , 1979 This month 's REDBOOK reports carry a clear message : a general softening in activity began in the second . quarter and . no early reversal is in sight . The decline has centered in housing and . consumption spending , but attempts to prevent buildups of inventories , and dimming prospects for capital spending are also factors . Despite weaknesses , total activity is still at or near record levels and job markets remain surprisingly strong . Motor fuel supplies have improved , but sectors affected by shortages are not returning to normal . Inflation continues unabated . Credit is tight , but generally available . Agriculture is prosperous , but transport problems are hampering movements of grain . Pessimism is widespread . as reflected in consumer attitudes , surveys of executives , and opinions of directors . The main causes of pessimism are continued inflation and . concerns associated with the long-tern availability of energy . Philadelphia 's survey of manufacturers shows a decline in activity after `` three months of no growth . '' Richmond 's survey reveals declines in shipments , new orders , and . order backlogs . > Most districts reported that gasoline and diesel shortages had eased significantly in the past month , because of both increased supplies and conservation induced by sharply higher prices and concern over availability . The main direct adverse impact of fuel shortages has been on low-mileage vehicles and tourism . A number of ' districts , notably Atlanta and"
4,1981_ny (6)_chunk_5.txt,0.9229,"of interest rates is excessive relative to business conditions and underlying inflation . There will be a correction as soon as the money aggregates return to roughly target levels . Wojnilower : The current business situation appears flat and slightly up . Businessmen , except in the housing and auto industries , are virtually unanimous in their optimism about the short and long run business outlook . Financial markets are more than ever preoccupied with day to day swings in security prices ."
5,1978_bo (6)_chunk_2.txt,0.9435,"through the first week of July . The tourist industry in northern New England is faring very well . Retailers are somewhat apprehensive about the future course of the economy and some feel that inventories may be a bit high ; but this concern is not based on actual experience . Both a major department store chain and a large utility report that uncollectible bills and bad debts , which increase as the economy weakens , are low and falling . The region 's economists are , for the most part , considerably more pessimistic about the economic outlook than are most businessmen . At a recent gathering of business economists nine out of twelve assigned a probability in excess of 50 percent to the prospect of a recession in late 1978 or early 1979 . Consumer debt , the rate of inflation and the Federal Reserve 's response were the basis for their gloomy forecasts . Professors Eckstein and Samuelson were available for comment this month . Both respondents agree that there is a very real possibility of a `` credit crunch '' developing in the second half of this year , although neither believes that the present level of interest rates will necessarily lead to this outcome . They also agreed that while an overly ambitious attempt to slow the rate of money growth could easily produce a recession , the economy 's prospects in the absence of this policy are for slower but still acceptable real growth in the months immediately ahead ."
6,1970_ph (8)_chunk_2.txt,0.9461,"the year . The expectations of area economists for the second half of 1970 parallel those of manufacturing executives . The consensus view of business and bank economists in the Third District , according to a recent poll , is that the current downturn in business activity is bottoming out and that by fall the economy will be expanding once more . Prices , say regional economists , will continue their upward climb , but at a modestly reduced rate as the year progresses . Comments on the current scene by members of the board of directors are varied . Two members , a banker and a utilities executive , indicate that sentiment among their associates has become rather gloomy in recent weeks . The gloom stems from the view that the current dip in the economy will develop into an all-out recession . Another cause of this gloom is the feeling that inflation can not be stopped without a serious recession . Two industrialists on the board , however , do not share this pessimistic sentiment . One indicated that he saw some further slowing in the pace of business activity , but believed that a recession would be avoided largely because of a pickup in consumer spending . The other industrialist expected no further declines in the volume of business . He indicated , though , that he was less certain about the economy and more confused now than at anytime in his experience . Further , this industrialist is skeptical about the ability"
7,1977_bo (3)_chunk_2.txt,0.9521,"as a whole to be fairly strong . According to area bankers commercial loan demand continues to be quite strong . Deposit inflows are healthy although NOW account activity makes it difficult to determine exactly what is happening . A director who runs an employment service reports that demand for workers is strong but that he has difficulty getting unskilled workers who have basic skills . No respondents reported any problems in obtaining materials nor were any anticipating difficulties with bottlenecks . Several respondents indicated that the pace of price increases had moderated significantly . Finally several directors expressed concern over the amount of uncertainty in the economy because of the energy package and the forthcoming tax reform proposals . Professors Eckstein , Houthakker , and Samuelson were contacted this month . Samuelson notes that a large majority of professional forecasters anticipate real growth of between 3 1/2 and 4 1/2 percent over the six-quarter period ending in 1978 . The exceptions , along with most nonprofessional estimates , are lower and are presumably based on the expectation that the lack of economic vigor will ignite inventory decumulation . The anticipated economic climate is reminiscent of 1974 , when several quarters of sluggish growth culminated in a collapse in demand . All forecasts fall well below the 5 percent real growth target that Samuelson finds economically and politically desirable . Samuelson and Eckstein both eschewed the Shadow FOMC 's recommendation of a one-shot $ 4 billion drop in Ml to return to the target path ."
8,1971_ph (12)_chunk_3.txt,0.9775,"which remain outstanding . The businesses to whom these commitments have been made are still looking around for permanent financing , however . Apparently they are shopping for funds with more favorable terms . This shopping around may mean that some people in the construction industry believe that further declines in interest rates are in the wind . Although there appears to be less pessimism in general now than a few weeks ago in the Third District , most businessmen still express a deep concern about inflation and unemployment . Nonetheless , a feeling seems to be emerging in the business community that the '' worst is behind us and somehow things will work out for the best . ''"
9,1974_ph (3)_chunk_3.txt,0.9777,"Virtually no new business is being accepted , and well established customers are required to show that their loans are intended for `` productive '' rather than speculative uses . The lending restrictions have not been directed at any one class of business borrowers . Finally , inflation shows no sign of abating . The glimmers of hope reported last month have all but disappeared from the results of our latest outlook survey . The businessmen anticipating higher prices for both the goods they buy and the ones they sell outnumber those expecting prices to hold steady by more than two to one . Only a handful of respondents expect any reduction in prices by next March . It looks like inflation psychology is well entrenched and will not soon be dislodged ."
10,1973_ri (12)_chunk_3.txt,0.9794,bankers in the District remain optimistic about the general economic outlook . More than 60 percent of the banking respondents expect an improvement in business activity in their areas in the immediate future . Optimism about the business outlook among textile producers seems especially high .
