In [2]:
!pip install PyPDF2
import PyPDF2
import requests

# Function to extract text from PDF
def extract_text_from_pdf(pdf_file):
    text = ""
    with open(pdf_file, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        for page_num in range(len(reader.pages)):
            page = reader.pages[page_num]
            text += page.extract_text() + "\n"
    return text

# Function to analyze sentiment using ICL, CoT, and simulated RAG with ChatGPT API
def analyze_sentiment_with_cot_and_rag(text, api_key):
    headers = {
        'Authorization': f'Bearer {api_key}',
        'Content-Type': 'application/json',
    }

    # ICL: Provide few-shot examples of sentiment analysis as context
    icl_examples = """
    Example 1: "The company performed exceptionally well this quarter." -> Sentiment Score: 0.9 (very positive)
    Example 2: "The audit revealed significant risks and operational inefficiencies." -> Sentiment Score: -0.7 (negative)
    Example 3: "The company met its compliance requirements, but there are still areas for improvement." -> Sentiment Score: 0.2 (neutral)
    Now, analyze the sentiment of the following sentence.
    """

    cot_prompt = f"""
    Based on the given examples and retrieved information, break down the sentiment analysis into reasoning steps.
    Then provide a sentiment score between -1 (very negative) to 1 (very positive).
    \nSentence: "{text}"
    """

    # Simulating RAG: Retrieve information from external sources (in this case, a mock knowledge base)
    rag_information = """
    Retrieved external knowledge:
    Positive phrases in audit reports include: "effective controls," "positive cash flow," "in compliance."
    Negative phrases in audit reports include: "significant risks," "material weakness," "operational inefficiencies."
    Neutral phrases in audit reports include: "met compliance requirements," "satisfactory performance," "adequate financial control."
    Use this information to assist in the sentiment analysis.
    """

    data = {
        "model": "gpt-3.5-turbo",  # Using GPT-3.5-turbo for cost-efficiency
        "messages": [
            {"role": "user", "content": f"{icl_examples}\n{rag_information}\n{cot_prompt}"}
        ],
        "max_tokens": 150  # Limit response size
    }

    response = requests.post('https://api.openai.com/v1/chat/completions', headers=headers, json=data)

    if response.status_code == 200:
        reply = response.json()
        try:
            # Extract and return the reasoning and sentiment score
            reasoning = reply['choices'][0]['message']['content']
            return reasoning
        except ValueError:
            return None  # If the response isn't valid, return None
    else:
        return None  # If there's an API error, return None

# Main function
def main():
    pdf_file_path = "/content/Do news headlines matter in the cryptocurrency market .pdf"  # Update with your file path
    api_key = "Api key"  # Update with your API key

    pdf_text = extract_text_from_pdf(pdf_file_path)

    # Chunk the text into smaller pieces to avoid exceeding the token limit
    chunk_size = 2000  # Adjust as needed based on the model's token limit
    text_chunks = [pdf_text[i:i + chunk_size] for i in range(0, len(pdf_text), chunk_size)]

    sentiment_scores = []
    for chunk in text_chunks:
        result = analyze_sentiment_with_cot_and_rag(chunk, api_key)
        if result is not None:
            print(result)  # Print reasoning and sentiment for each chunk

if __name__ == "__main__":
    main()


1. The sentence discusses a research paper on the influence of news headlines on investor sentiment in the cryptocurrency market.
2. The study uses Natural Language Processing (NLP) to analyze sentiment from cryptocurrency-specific news headlines.
3. The findings suggest that positive sentiment in news headlines leads to herding behavior and price increases in the cryptocurrency market.
4. The study also mentions that different cryptocurrencies respond differently to sentiment, especially young, small, and volatile ones.

Overall, the sentiment in this sentence can be considered positive as it indicates a significant impact of news sentiment on cryptocurrency returns, especially when sentiment is optimistic. Therefore, the sentiment score can be considered around 0.7 (positive).
Reasoning steps:
- The sentence discusses using cryptocurrency-specific news headlines to build a news sentiment indicator.
- It provides information about the growth and popularity of cryptocurrency since the 