In [None]:
!pip install BeautifulSoup4 requests IPython openai langchain selenium faiss-cpu IPython

In [None]:
!apt-get update
!apt-get install -y firefox
!apt-get install -y wget
!wget https://github.com/mozilla/geckodriver/releases/download/v0.33.0/geckodriver-v0.33.0-linux64.tar.gz
!tar -xvf geckodriver-v0.33.0-linux64.tar.gz
!chmod +x geckodriver
!mv geckodriver /usr/local/bin

In [None]:
os.environ['OPENAI_API_KEY'] = 'your_api_key_here'

In [None]:
import os
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service

# Set up the Chrome WebDriver
firefox_options = Options()
firefox_options.add_argument('-headless')
driver = webdriver.Firefox(options=firefox_options)

# Get CNN homepage HTML
driver.get('https://www.cnn.com')
content = driver.page_source

# Parse CNN homepage with BeautifulSoup for headlines
soup = BeautifulSoup(content, 'html.parser')
elements = soup.find_all(class_='cd__headline-text vid-left-enabled')
elements_strings = [element.get_text(strip=True) for element in elements]
cnn_headlines = 'NewsSource1 Headlines:\n\n' + '\n\n'.join(elements_strings) + '\n\n\n\n'

# Get FOX homepage HTML
driver.get('https://www.foxnews.com')
content = driver.page_source

# Parse FOX homepage with BeautifulSoup for headlines
soup = BeautifulSoup(content, 'html.parser')
elements = soup.find_all(class_='title')
elements_strings = [element.get_text(strip=True) for element in elements]
fox_headlines = 'NewsCorp2 Headlines:\n\n' + '\n\n'.join(elements_strings)

# Combine headlines
headlines = cnn_headlines + fox_headlines

# Close the WebDriver
driver.quit()

In [None]:
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import AnalyzeDocumentChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

# Create embedding and setup QA for ChatGPT
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_text(headlines)
embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_API_KEY'])
vectorstore = FAISS.from_texts(texts, embeddings)
qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(model='gpt-3.5-turbo'), chain_type="stuff", retriever=vectorstore.as_retriever())

In [None]:
# Define prompt
prompt = '''
Compare the headlines of NewsSource1 and NewsCorp2.
What do they each say about Trump?
Which is more critical of Trump and why is this?
Which party does the more critical group most likely represent?
What are that audiences' beliefs and values?
'''

# Send prompt to ChatGPT
response = qa.run(prompt)

In [None]:
from IPython.display import display, Markdown

# Display ChatGPT's response
display(Markdown(response))

NewsSource1 headlines focus entirely on Trump, with several stories detailing his current legal issues and controversies. They suggest negative developments for the former president with phrases like "terrible idea," "problem," and "grave national drama."

In contrast, NewsCorp2 has only one headline directly about Trump, which simply mentions his arraignment. The other headlines cover a range of topics, including human interest stories, crime, and business news.

Overall, NewsSource1 is more critical of Trump. This is because they have multiple articles focused on his legal issues and use negative language in their headlines. NewsCorp2, on the other hand, only has one article about Trump, which uses neutral language.

Without more information about the respective audiences and their values and beliefs, it is difficult to determine which party each news source most likely represents. However, it is possible that NewsSource1's audience might be more likely to have liberal or Democratic leanings, given the critical focus on Trump and use of language like "Whistleblower says this Trump move would be a 'terrible idea'" and "Majority of Americans approve of Trump indictment." NewsCorp2's audience might be more neutral or conservative, given the range of topics covered and the lack of any clear bias in their coverage.