In [1]:
import json

def process_tweet_file(filename):
    processed_tweets = []
    
    with open(filename, 'r') as f:
        for i, line in enumerate(f, 1):
            tweet = json.loads(line)
            # Concatenate all tokens in the text array with spaces
            full_text = f"{i}. " + ' '.join(tweet['text'])
            processed_tweets.append(full_text)
    
    return processed_tweets

# Template text
template = """<Task>
Analyze the provided tweets to determine their likely impact on the future stock price of a given company. Your answer must be one of the following: "[Positive]", "[Neutral]", or "[Negative]".

<Solving Process>
1. Identify the Target Stock: Extract the stock symbol from the tweets.
2. Tweet-by-Tweet Analysis:
(a) Separate the Factors: For each relevant tweet, identify Positive and Negative factors.
(b) Assess Sentiment: Evaluate how each relevant tweet might influence investor sentiment.
3. Overall Sentiment Summary: Combine your analyses to conclude the overall sentiment.

<Tweets>
{}

<Output Requirement>
You must do this: Conclude with a single line that states the overall sentiment. Use one of the following tags exactly:
[Positive]
[Neutral]
[Negative]
"""

# Process the file
filename = 'tweet/preprocessed/AAPL/2014-01-02'
tweets = process_tweet_file(filename)

# Format tweets and print with template
formatted_tweets = '\n'.join(tweets)
print(template.format(formatted_tweets)) 

<Task>
Analyze the provided tweets to determine their likely impact on the future stock price of a given company. Your answer must be one of the following: "[Positive]", "[Neutral]", or "[Negative]".

<Solving Process>
1. Identify the Target Stock: Extract the stock symbol from the tweets.
2. Tweet-by-Tweet Analysis:
(a) Separate the Factors: For each relevant tweet, identify Positive and Negative factors.
(b) Assess Sentiment: Evaluate how each relevant tweet might influence investor sentiment.
3. Overall Sentiment Summary: Combine your analyses to conclude the overall sentiment.

<Tweets>
1. $ aapl - wall st . kicks off new year on lower note -> URL stock stocks stockaction
2. rt AT_USER here's how apple could be making a huge push into healthcare --> URL $ aapl
3. rt AT_USER our top 3 trade ideas for 2014 and two $ 1,000 futures wins already in the bank $ gld $ uso $ aapl - - URL
4. AT_USER $ aapl having breathing difficulties
5. emylers : cheeky's board ! : $ aapl up to date compan

In [2]:
import ollama

ollama.list()

ListResponse(models=[Model(model='deepseek-r1:1.5b', modified_at=datetime.datetime(2025, 3, 13, 16, 50, 32, 808806, tzinfo=TzInfo(-04:00)), digest='a42b25d8c10a841bd24724309898ae851466696a7d7f3a0a408b895538ccbc96', size=1117322599, details=ModelDetails(parent_model='', format='gguf', family='qwen2', families=['qwen2'], parameter_size='1.8B', quantization_level='Q4_K_M')), Model(model='gemma2:latest', modified_at=datetime.datetime(2024, 11, 22, 9, 38, 21, 629393, tzinfo=TzInfo(-05:00)), digest='ff02c3702f322b9e075e9568332d96c0a7028002f1a5a056e0a6784320a4db0b', size=5443152417, details=ModelDetails(parent_model='', format='gguf', family='gemma2', families=['gemma2'], parameter_size='9.2B', quantization_level='Q4_0'))])

In [3]:
from ollama import chat
from ollama import ChatResponse

def get_ai_analysis(formatted_prompt):
    """
    Analyze tweets using ollama's deepseek-r1 model
    """
    try:
        # print(ollama.list())

        response: ChatResponse = chat(
            model='deepseek-r1:1.5b',
            messages=[{
                'role': 'user',
                'content': formatted_prompt
            }]
        )
        return response.message.content
    except Exception as e:
        print(f"Error using model: {str(e)}")
        return "[Error]"

final_prompt = template.format(formatted_tweets)

# Get AI analysis
ai_response = get_ai_analysis(final_prompt)
print("AI Analysis Result:")
print(ai_response) 

AI Analysis Result:
<think>
Alright, so I have to figure out whether these tweets are positive, negative, or neutral in terms of their impact on a stock's future price for a given company, like Apple (AAPL). Let me go through each tweet one by one and analyze them.

Starting with the first tweet: It says Wall Street kicks off a new year on "lower note." That might suggest some bad news, but it's vague. The word "kicks off" could be positive or negative depending on context. Maybe not enough info to decide yet.

The second tweet mentions Apple making a huge push into healthcare. I'm not sure if this is good or bad for the stock. Healthcare being a separate industry might affect Apple's performance, but it's speculative without more information.

Third tweet talks about trade ideas and gains for 2014 with $1,000 wins. Positive trades could indicate success, but if they're in the bank, maybe the overall market is down, which might not directly impact Apple.

Fourth tweet refers to somethi

批量处理&保存

In [5]:
import os
import csv
from datetime import datetime, timedelta
import re


def extract_sentiment_and_analysis(ai_response):
    # 提取最终情绪
    sentiment_matches = re.findall(r'\[(Positive|Negative|Neutral)\]', ai_response)
    if sentiment_matches:
        sentiment = sentiment_matches[-1]
    else:
        sentiment = "Unknown"  # 提供默认值
    
    # 提取分析内容
    analysis_matches = re.findall(r'<think>(.*?)</think>', ai_response, re.DOTALL)
    if analysis_matches:
        analysis = analysis_matches[0].strip()
    else:
        analysis = "No analysis provided"  # 提供默认值
    
    return sentiment, analysis


def process_date_range(start_date, end_date, symbol):
    # 创建原始AI分析结果CSV文件
    raw_results_file = f'raw_sentiment_analysis_{symbol}_{start_date.strftime("%Y%m%d")}_{end_date.strftime("%Y%m%d")}.csv'
    
    # 创建处理后的结果CSV文件
    processed_results_file = f'preprocessed_sentiment_analysis_{symbol}_{start_date.strftime("%Y%m%d")}_{end_date.strftime("%Y%m%d")}.csv'

    # 打开原始结果文件
    with open(raw_results_file, 'w', newline='', encoding='utf-8') as raw_csvfile:
        raw_writer = csv.writer(raw_csvfile)
        raw_writer.writerow(['Date', 'Raw_Analysis'])
        
        # 打开处理后的结果文件
        with open(processed_results_file, 'w', newline='', encoding='utf-8') as processed_csvfile:
            processed_writer = csv.writer(processed_csvfile)
            processed_writer.writerow(['Date', 'Sentiment', 'Analysis'])

            current_date = start_date
            while current_date <= end_date:
                date_str = current_date.strftime('%Y-%m-%d')
                filename = f'tweet/preprocessed/{symbol}/{date_str}'

                if os.path.exists(filename):
                    # 处理文件
                    tweets_content = process_tweet_file(filename)
                    formatted_tweets = '\n'.join(tweets_content)
                    formatted_content = template.format(formatted_tweets)
                    
                    # Initialize variables
                    sentiment, analysis = "Unknown", "No analysis provided"
                    
                    # Keep trying until valid sentiment and analysis are extracted
                    while sentiment == "Unknown" or analysis == "No analysis provided":
                        ai_analysis = get_ai_analysis(formatted_content)
                        # print(ai_analysis)
                        
                        # 先保存原始AI分析结果
                        raw_writer.writerow([date_str, ai_analysis])
                        
                        try:
                            # 提取情绪和分析
                            sentiment, analysis = extract_sentiment_and_analysis(ai_analysis)
                            if sentiment == "Unknown" or analysis == "No analysis provided":
                                print(f"Failed to extract valid sentiment and analysis for {date_str}. Retrying...")
                        except Exception as e:
                            # 处理提取过程中可能出现的错误
                            print(f"Error processing {date_str}: {str(e)}")
                            # 写入错误信息到处理后的CSV
                            processed_writer.writerow([date_str, "Error", f"Error extracting data: {str(e)}"])
                            break  # Exit loop on error

                    # 写入处理后的CSV
                    processed_writer.writerow([date_str, sentiment, analysis])
                    print(f"Finished processing: {date_str}, Sentiment: {sentiment}")
                else:
                    print(f"Not exist: {date_str}")
                    processed_writer.writerow([date_str, "Missing", "File not found"])

                current_date += timedelta(days=1)
    
    print(f"Raw results saved to: {raw_results_file}")
    print(f"Processed results saved to: {processed_results_file}")


# 设置日期范围
start_date = datetime(2014, 1, 1)
# end_date = datetime(2015, 12, 31)
end_date = datetime(2014, 1, 5)
symbol = 'AAPL'

process_date_range(start_date, end_date, symbol)

<think>
Alright, so I'm trying to figure out how to analyze these tweets about Apple's stock price impact on their future stock price. The user provided some tweets and wants me to determine if each is positive, neutral, or negative, then give an overall sentiment.

First, I need to identify the target stock symbol from the tweets. Looking through them, I see a few mentions of Apple with the symbol "aapl". That's the one I should focus on for my analysis.

Next, I'll go tweet by tweet and break down each one into factors and sentiment.

1. The first tweet is an AT_USER summary of yesterday's webcast about traders hedging strategies. It doesn't specify anything new or negative, just an update from a trader. So it's neutral because it's neither positive nor negative in any significant way.

2. Same as the first one—it's another traders' webcast summary with no specific trends or issues. So again, this is neutral.

3. Third tweet says "itv will boost apple URL $ aapl apple". The word "URL