In [None]:
!pip install langchain
!pip install openai
!pip install tiktoken
!pip install faiss-gpu
!pip install langchain_experimental
!pip install "langchain[docarray]"

In [100]:
from langchain.chat_models import ChatOpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders.csv_loader import CSVLoader
import tiktoken
import pandas as pd

In [None]:
import os

# Prompt the user for their OpenAI API key
api_key = input("Please enter your OpenAI API key: ")

# Set the API key as an environment variable
os.environ["OPENAI_API_KEY"] = api_key

# Optionally, check that the environment variable was set correctly
print("OPENAI_API_KEY has been set!")


In [103]:
input_csv_path = './news.csv'
df = pd.read_csv(input_csv_path)

# Define a function to trim the text to at most 300 words
def trim_text(text, max_words=300):
    words = text.split()[:max_words]
    return ' '.join(words)

# Apply the trim_text function to the "Body" column in the DataFrame
df['Body'] = df['Body'].apply(trim_text)

# Replace 'output_file.csv' with the desired name for the new CSV file
output_csv_path = 'output.csv'

# Save the first half DataFrame to a new CSV file
df.to_csv(output_csv_path, index=False)

In [112]:
# Specify the path to your CSV file
csv_file_path = './output.csv'

# Create an instance of the CSVLoader
csv_loader = CSVLoader(file_path=csv_file_path, source_column="Link", encoding="utf-8", metadata_columns=["ID", "Title", "Description", "Keywords", "Theme"])

# Load the data from the CSV file
data = csv_loader.load()

# Create an instance of the CharacterTextSplitter class with specific parameters.
# It splits text into chunks of 1000 characters each with a 200-character overlap.
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

# Split the text into documents using the text splitter.
data = text_splitter.split_documents(data)

In [113]:
# Create vector store
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(data, embedding=embeddings)

In [114]:
# Create conversation chain
llm = ChatOpenAI(temperature=0.7, model_name="gpt-4")
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        chain_type="stuff",
        retriever=vectorstore.as_retriever(),
        memory=memory
        )

In [115]:
query = "What happened at Morgan State University in 2023?"
result = conversation_chain({"question": query})
answer = result["answer"]
answer

'In 2023, an attack occurred at Morgan State University in Baltimore during a crowded homecoming event. There were two shooters involved in the incident, which injured five people, including four students. The victims, four men and one woman aged between 18 and 22, were taken to the hospital with non-life-threatening injuries. As of a few days after the incident, four of the five victims had been released from the hospital, while one remained in stable condition. The Baltimore Police Department released video and photos of four individuals seen near the scene of the shooting, asking the public for help in identifying them. The shooting is believed to have resulted from a dispute between two small groups. The police are continuing to investigate the incident.'

In [116]:
query = "Who is Pat Fitzgerald and what happened to him in 2023?"
result = conversation_chain({"question": query})
answer = result["answer"]
answer

'Pat Fitzgerald is a former Northwestern University football coach. In 2023, he was involved in a series of events that started with accusations of hazing within the Wildcats football program. Northwestern University initially suspended him for two weeks without pay following an independent investigation into these allegations. However, despite Fitzgerald denying any knowledge of the hazing, the university decided to fire him shortly after his suspension. Fitzgerald is now suing Northwestern University and its president, Michael Schill, for at least $130 million, claiming wrongful termination among other charges. He asserts that he had an oral agreement with the university that he would not face further discipline after the suspension. The lawsuit also seeks damages for defamation and intentional infliction of emotional distress.'

In [109]:
query = "What are the Covid-19 vaccines breakthroughs last year?"
result = conversation_chain({"question": query})
answer = result["answer"]
answer

'Last year saw significant breakthroughs in Covid-19 vaccines. The US Food and Drug Administration gave approval to updated Covid-19 vaccines from Moderna and Pfizer/BioNTech that were effective against the then dominant strain in the United States, EG.5. These updated vaccines were set to become available in mid-September. Additionally, Katalin Karikó and Drew Weissman received the Nobel Prize in physiology or medicine for their work on mRNA vaccines, which have been a crucial tool in curtailing the spread of Covid-19. Their work laid the foundation for Pfizer/BioNTech and Moderna to use a new approach to produce vaccines using messenger RNA or mRNA. This revolutionary technology can potentially be harnessed to develop vaccines against other diseases like malaria, RSV, and HIV and offers a new approach to infectious disease like cancer, with the prospect of personalized vaccines.'

In [110]:
query = "What happened to UK inflation in July 2023? Please provide relevant link to the news."
result = conversation_chain({"question": query})
answer = result["answer"]
answer

'In July 2023, the UK inflation rate eased to a 17-month low, with consumer prices rising 6.8% compared with a year ago. This is due to the falls in the price of gas and electricity after an energy price cap was lowered at the end of June. However, the prices of services, such as haircuts, hotels, and restaurants, increased at a faster pace than the previous month. For more details, refer to this article: [UK inflation eases to 17-month low](https://edition.cnn.com/2023/08/16/economy/uk-inflation-slowdown/index.html).'

In [117]:
query = "Please tell me about the US labor market in 2023 and provide the relevant link for me."
result = conversation_chain({"question": query})
answer = result["answer"]
answer

'In 2023, the US labor market showed strong performance with the economy adding 253,000 jobs in April, which was more than economists expected. The unemployment rate fell to 3.4%, matching a 53-year low. The labor force participation rate held steady at 62.6%. Some of the largest job increases were seen in the sectors of private education and health services, professional and business services, and leisure and hospitality. Despite high prices, rising interest rates and banking uncertainty, the labor market continued to grow. For more details, you can refer to this article: [CNN Link](https://edition.cnn.com/2023/05/05/economy/april-jobs-report-takeaways/index.html).'