In [2]:
import json

def process_tweet_file(filename):
    processed_tweets = []
    
    with open(filename, 'r') as f:
        for i, line in enumerate(f, 1):
            tweet = json.loads(line)
            # Concatenate all tokens in the text array with spaces
            full_text = f"{i}. " + ' '.join(tweet['text'])
            processed_tweets.append(full_text)
    
    return processed_tweets

# Template text
template = """<Task>
Analyze the provided tweets to determine their likely impact on the future stock price of a given company. Your answer must be one of the following: "[Positive]", "[Neutral]", or "[Negative]".

<Solving Process>
1. Identify the Target Stock: Extract the stock symbol from the tweets.
2. Tweet-by-Tweet Analysis:
(a) Separate the Factors: For each relevant tweet, identify Positive and Negative factors.
(b) Assess Sentiment: Evaluate how each relevant tweet might influence investor sentiment.
3. Overall Sentiment Summary: Combine your analyses to conclude the overall sentiment.

<Tweets>
{}

<Output Requirement>
You must do this: Conclude with a single line that states the overall sentiment. Use one of the following tags exactly:
[Positive]
[Neutral]
[Negative]
"""

# Process the file
filename = 'tweet/preprocessed/AAPL/2014-01-02'
tweets = process_tweet_file(filename)

# Format tweets and print with template
formatted_tweets = '\n'.join(tweets)
print(template.format(formatted_tweets)) 

<Task>
Analyze the provided tweets to determine their likely impact on the future stock price of a given company. Your answer must be one of the following: "[Positive]", "[Neutral]", or "[Negative]".

<Solving Process>
1. Identify the Target Stock: Extract the stock symbol from the tweets.
2. Tweet-by-Tweet Analysis:
(a) Separate the Factors: For each relevant tweet, identify Positive and Negative factors.
(b) Assess Sentiment: Evaluate how each relevant tweet might influence investor sentiment.
3. Overall Sentiment Summary: Combine your analyses to conclude the overall sentiment.

<Tweets>
1. $ aapl - wall st . kicks off new year on lower note -> URL stock stocks stockaction
2. rt AT_USER here's how apple could be making a huge push into healthcare --> URL $ aapl
3. rt AT_USER our top 3 trade ideas for 2014 and two $ 1,000 futures wins already in the bank $ gld $ uso $ aapl - - URL
4. AT_USER $ aapl having breathing difficulties
5. emylers : cheeky's board ! : $ aapl up to date compan

In [1]:
import ollama

ollama.list()

ListResponse(models=[Model(model='deepseek-r1:1.5b', modified_at=datetime.datetime(2025, 3, 13, 16, 50, 32, 808806, tzinfo=TzInfo(-04:00)), digest='a42b25d8c10a841bd24724309898ae851466696a7d7f3a0a408b895538ccbc96', size=1117322599, details=ModelDetails(parent_model='', format='gguf', family='qwen2', families=['qwen2'], parameter_size='1.8B', quantization_level='Q4_K_M')), Model(model='gemma2:latest', modified_at=datetime.datetime(2024, 11, 22, 9, 38, 21, 629393, tzinfo=TzInfo(-05:00)), digest='ff02c3702f322b9e075e9568332d96c0a7028002f1a5a056e0a6784320a4db0b', size=5443152417, details=ModelDetails(parent_model='', format='gguf', family='gemma2', families=['gemma2'], parameter_size='9.2B', quantization_level='Q4_0'))])

In [3]:
from ollama import chat
from ollama import ChatResponse

def get_ai_analysis(formatted_prompt):
    """
    Analyze tweets using ollama's deepseek-r1 model
    """
    try:
        # print(ollama.list())

        response: ChatResponse = chat(
            model='deepseek-r1:1.5b',
            messages=[{
                'role': 'user',
                'content': formatted_prompt
            }]
        )
        return response.message.content
    except Exception as e:
        print(f"Error using model: {str(e)}")
        return "[Error]"

final_prompt = template.format(formatted_tweets)

# Get AI analysis
ai_response = get_ai_analysis(final_prompt)
print("AI Analysis Result:")
print(ai_response) 

AI Analysis Result:
<think>
Okay, let's try to figure out how to analyze the impact of these tweets on the stock price of Apple. 

First, I'll go through each tweet and see if they mention the company or its stocks. Some tweets are about other companies like companies that might affect Apple's performance.

1. The first tweet talks about Apple kicking off a new year with lower stock news.
2. Other tweets discuss apple moving into healthcare, which could impact their share price indirectly.
3. A couple of tweets mention Xiating and Green Energy, which might not directly relate to Apple, but they are interesting in their own right.
4. One tweet says the company is having breathing difficulties, which seems like a concern about their health or performance.
5. Another mentions big tuna's stocks moving, so I guess that would affect Apple as well.

Looking at these, some tweets point towards positive developments for Apple, especially around earnings and stock movements. There are also some 

批量处理&保存

In [11]:
import os
import csv
from datetime import datetime, timedelta
import re


def extract_sentiment_and_analysis(ai_response):
    # 提取最终情绪
    sentiment = re.findall(r'\[(Positive|Negative|Neutral)\]', ai_response)[-1]

    # 提取分析内容
    analysis = re.findall(r'<think>(.*?)</think>',
                          ai_response, re.DOTALL)[0].strip()

    return sentiment, analysis


def process_date_range(start_date, end_date, symbol):
    # 创建结果CSV文件
    results_file = f'sentiment_analysis_{symbol}_{start_date.strftime("%Y%m%d")}_{end_date.strftime("%Y%m%d")}.csv'

    with open(results_file, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Date', 'Sentiment', 'Analysis'])

        current_date = start_date
        while current_date <= end_date:
            date_str = current_date.strftime('%Y-%m-%d')
            filename = f'tweet/preprocessed/{symbol}/{date_str}'

            if os.path.exists(filename):
                # 处理文件
                tweets_content = process_tweet_file(filename)
                formatted_tweets = '\n'.join(tweets_content)
                formatted_content = template.format(formatted_tweets)
                ai_analysis = get_ai_analysis(formatted_content)

                # 提取情绪和分析
                sentiment, analysis = extract_sentiment_and_analysis(
                    ai_analysis)

                # 写入CSV
                writer.writerow([date_str, sentiment, analysis])
                print(f"Finished: {date_str}")
            else:
                print(f"Not exist: {date_str}")

            current_date += timedelta(days=1)


# 设置日期范围
start_date = datetime(2014, 1, 1)
# end_date = datetime(2015, 12, 31)
end_date = datetime(2014, 1, 2)
symbol = 'ABB'

process_date_range(start_date, end_date, symbol)

Not exist: 2014-01-01
Not exist: 2014-01-02
