In [26]:
import ollama

# 풀할 모델 리스트
models_to_pull = [
    "llama3.1:8b",
    "gemma2:27b",
    "mistral:7b",
    "llama3.2:3b"
]

# 모델 풀하기
for model_name in models_to_pull:
    print(f"Pulling model: {model_name}")
    ollama.pull(model_name)

print("Selected models have been pulled successfully.")

{'models': [{'name': 'llama3.1:8b',
   'model': 'llama3.1:8b',
   'modified_at': '2024-11-06T15:34:28.326803983+09:00',
   'size': 4661230766,
   'digest': '42182419e9508c30c4b1fe55015f06b65f4ca4b9e28a744be55008d21998a093',
   'details': {'parent_model': '',
    'format': 'gguf',
    'family': 'llama',
    'families': ['llama'],
    'parameter_size': '8.0B',
    'quantization_level': 'Q4_0'}},
  {'name': 'gemma2:27b',
   'model': 'gemma2:27b',
   'modified_at': '2024-11-05T19:58:30.214524333+09:00',
   'size': 15628387458,
   'digest': '53261bc9c192c1cb5fcc898dd3aa15da093f5ab6f08e17e48cf838bb1c58abfe',
   'details': {'parent_model': '',
    'format': 'gguf',
    'family': 'gemma2',
    'families': ['gemma2'],
    'parameter_size': '27.2B',
    'quantization_level': 'Q4_0'}},
  {'name': 'mistral:7b',
   'model': 'mistral:7b',
   'modified_at': '2024-11-05T19:34:44.773585762+09:00',
   'size': 4113301824,
   'digest': 'f974a74358d62a017b37c6f424fcdf2744ca02926c4f952513ddf474b2fa5091',
  

In [1]:
import pandas as pd
import os
import re
import ollama

# 폴더 경로 설정
data_folder = 'Data'
output_folder = 'Sentiment'
os.makedirs(output_folder, exist_ok=True)

# CSV 파일 경로 설정
input_csv = os.path.join(data_folder, 'news_data.csv')

# 모델 이름 리스트 자동으로 가져오기
models_data = ollama.list()
models = [model['name'] for model in models_data['models']]  # 모델 이름 추출

def extract_sentiment(text):
    # Define the keywords
    keywords = ["positive", "negative", "neutral"]
    # Convert text to lowercase and find the earliest occurrence of any keyword
    text = str(text).lower()
    indices = {keyword: text.find(keyword) for keyword in keywords}
    # Find the keyword with the smallest index that is not -1
    earliest = min((index for index in indices.values() if index != -1), default=-1)
    for keyword, index in indices.items():
        if index == earliest:
            return keyword
    # Default to 'neutral' if no keyword is found
    return "neutral"

# 각 모델에 대해 sentiment 분석 수행
for model_name in models:
    # 모델별 CSV 파일 경로 설정
    sanitized_model_name = re.sub(r'[^a-zA-Z0-9_-]', '_', model_name)
    output_csv = os.path.join(output_folder, f'{sanitized_model_name}_NOCoT.csv')
    
    # Load or create the CSV data
    if os.path.exists(output_csv):
        news_data = pd.read_csv(output_csv)
    else:
        news_data = pd.read_csv(input_csv)
        news_data['Sentiment Analysis'] = None  # Initialize with None for new column

    # Loop through each article and ask for sentiment analysis only if 'Sentiment Analysis' is empty
    for index, row in news_data.iterrows():
        if pd.isna(row['Sentiment Analysis']):  # Check if the sentiment analysis is missing
            # Define the system message to set the role
            system_message = {
                'role': 'system',
                'content': "You are a stock analyst specializing in assessing sentiment in financial news articles."
            }

            # Define the question based on the article title and provide the response format
            question = f"Based on the article titled '{row['Title']}' determine if the tone is positive, negative, or neutral toward {row['Symbol']}.\n" \
           "Please respond in the following format, and omit any reasoning:\n" \
           "Sentiment: [positive/negative/neutral]"

            # Send the question to the model
            response = ollama.chat(model=model_name, messages=[
                system_message,
                {
                    'role': 'user',
                    'content': question,
                },
            ])

            # Extract the response content
            sentiment_response = response['message']['content'] if 'message' in response and 'content' in response['message'] else "No response"

            # Update the sentiment analysis directly in the DataFrame
            news_data.at[index, 'Sentiment Analysis'] = sentiment_response

            # Save the DataFrame back to CSV after each response
            news_data.to_csv(output_csv, index=False, encoding='utf-8')

    # Apply the sentiment extraction function to create the Predict column
    news_data['Predict'] = news_data['Sentiment Analysis'].apply(extract_sentiment)

    # Save the updated DataFrame with the Predict column back to the CSV file
    news_data.to_csv(output_csv, index=False, encoding='utf-8')

print("모든 모델의 감정 분석이 완료되었습니다.")


모든 모델의 감정 분석이 완료되었습니다.


In [2]:
import pandas as pd
import os
import re
import ollama

# 폴더 경로 설정
data_folder = 'Data'
output_folder = 'Sentiment'
os.makedirs(output_folder, exist_ok=True)

# CSV 파일 경로 설정
input_csv = os.path.join(data_folder, 'news_data.csv')

# 모델 이름 리스트 자동으로 가져오기
models_data = ollama.list()
models = [model['name'] for model in models_data['models']]  # 모델 이름 추출

# 감정 추출 함수
def extract_sentiment(text):
    # Define the keywords
    keywords = ["positive", "negative", "neutral"]
    # Convert text to lowercase and find the earliest occurrence of any keyword
    text = str(text).lower()
    indices = {keyword: text.find(keyword) for keyword in keywords}
    # Find the keyword with the smallest index that is not -1
    earliest = min((index for index in indices.values() if index != -1), default=-1)
    for keyword, index in indices.items():
        if index == earliest:
            return keyword
    # Default to 'neutral' if no keyword is found
    return "neutral"

# 각 모델에 대해 sentiment 분석 수행
for model_name in models:
    # 모델별 CSV 파일 경로 설정
    sanitized_model_name = re.sub(r'[^a-zA-Z0-9_-]', '_', model_name)
    output_csv = os.path.join(output_folder, f'{sanitized_model_name}_CoT.csv')
    
    # Load or create the CSV data
    if os.path.exists(output_csv):
        news_data = pd.read_csv(output_csv)
    else:
        news_data = pd.read_csv(input_csv)
        news_data['Sentiment Analysis'] = None  # Initialize with None for new column

    # Loop through each article and ask for sentiment analysis only if 'Sentiment Analysis' is empty
    for index, row in news_data.iterrows():
        if pd.isna(row['Sentiment Analysis']):  # Check if the sentiment analysis is missing
            # Define the system message to set the role
            system_message = {
                'role': 'system',
                'content': "You are a stock analyst specializing in assessing sentiment in financial news articles."
            }

            # Define the question based on the article title and provide the response format
            question = f"Based on the article titled '{row['Title']}' determine if the tone is positive, negative, or neutral toward {row['Symbol']}.\n" \
                       "Please respond in the following format:\n" \
                       "Sentiment: [positive/negative/neutral]\n" \
                       "Reason: [Brief explanation based on the article]"

            # Send the question to the model
            response = ollama.chat(model=model_name, messages=[
                system_message,
                {
                    'role': 'user',
                    'content': question,
                },
            ])

            # Extract the response content
            sentiment_response = response['message']['content'] if 'message' in response and 'content' in response['message'] else "No response"

            # Update the sentiment analysis directly in the DataFrame
            news_data.at[index, 'Sentiment Analysis'] = sentiment_response

            # Save the DataFrame back to CSV after each response
            news_data.to_csv(output_csv, index=False, encoding='utf-8')

    # Apply the sentiment extraction function to create the Predict column
    news_data['Predict'] = news_data['Sentiment Analysis'].apply(extract_sentiment)

    # Save the updated DataFrame with the Predict column back to the CSV file
    news_data.to_csv(output_csv, index=False, encoding='utf-8')

print("모든 모델의 감정 분석이 완료되었습니다.")

모든 모델의 감정 분석이 완료되었습니다.


In [3]:
import pandas as pd
import os
import re
from collections import Counter
import ollama

# 폴더 경로 설정
data_folder = 'Data'
output_folder = 'Sentiment'
os.makedirs(output_folder, exist_ok=True)

# CSV 파일 경로 설정
input_csv = os.path.join(data_folder, 'news_data.csv')

# 모델 이름 리스트 자동으로 가져오기
models_data = ollama.list()
models = [model['name'] for model in models_data['models']]  # 모델 이름 추출

# 감정 추출 함수
def extract_sentiment(text):
    # Define the keywords
    keywords = ["positive", "negative", "neutral"]
    # Convert text to lowercase and find the earliest occurrence of any keyword
    text = str(text).lower()
    indices = {keyword: text.find(keyword) for keyword in keywords}
    # Find the keyword with the smallest index that is not -1
    earliest = min((index for index in indices.values() if index != -1), default=-1)
    for keyword, index in indices.items():
        if index == earliest:
            return keyword
    # Default to 'neutral' if no keyword is found
    return "neutral"

# 각 모델에 대해 sentiment 분석 수행
for model_name in models:
    sanitized_model_name = re.sub(r'[^a-zA-Z0-9_-]', '_', model_name)
    output_csv = os.path.join(output_folder, f'{sanitized_model_name}_Bootstrap.csv')
    
    if os.path.exists(output_csv):
        news_data = pd.read_csv(output_csv)
    else:
        news_data = pd.read_csv(input_csv)
        news_data['Sentiment Analysis'] = None

    for index, row in news_data.iterrows():
        if pd.isna(row['Sentiment Analysis']):  # Check if the sentiment analysis is missing
            responses = []
            for _ in range(5):  # 5번 반복하여 모델에 요청
                system_message = {
                    'role': 'system',
                    'content': "You are a stock analyst specializing in assessing sentiment in financial news articles."
                }
                question = f"Based on the article titled '{row['Title']}' determine if the tone is positive, negative, or neutral toward {row['Symbol']}.\n" \
                           "Please respond in the following format, and omit any reasoning:\n" \
                           "Sentiment: [positive/negative/neutral]"

                response = ollama.chat(model=model_name, messages=[
                    system_message,
                    {
                        'role': 'user',
                        'content': question,
                    },
                ])
                
                sentiment_response = response['message']['content'] if 'message' in response and 'content' in response['message'] else "neutral"
                responses.append(extract_sentiment(sentiment_response))

            # 가장 많이 등장한 응답이 3번 이상일 경우 그 응답을 사용
            response_counts = Counter(responses)
            most_common_response, count = response_counts.most_common(1)[0]
            if count >= 3:
                final_sentiment = most_common_response
            else:
                final_sentiment = "neutral"

            news_data.at[index, 'Sentiment Analysis'] = final_sentiment

            # CSV에 저장
            news_data.to_csv(output_csv, index=False, encoding='utf-8')

    news_data['Predict'] = news_data['Sentiment Analysis'].apply(extract_sentiment)
    news_data.to_csv(output_csv, index=False, encoding='utf-8')

print("모든 모델의 감정 분석이 완료되었습니다.")


모든 모델의 감정 분석이 완료되었습니다.


In [4]:
import pandas as pd
import os
from transformers import pipeline

# 폴더 경로 설정
data_folder = 'Data'
output_folder = 'Sentiment'
os.makedirs(output_folder, exist_ok=True)

# CSV 파일 경로 설정
input_csv = os.path.join(data_folder, 'news_data.csv')

# BERT 기반 감정 분석 파이프라인 초기화
sentiment_pipeline = pipeline("text-classification", model="nlptown/bert-base-multilingual-uncased-sentiment")

# 감정 레이블을 Positive/Negative/Neutral로 매핑
def map_label_to_sentiment(label):
    label_mapping = {
        "1 star": "negative",
        "2 stars": "negative",
        "3 stars": "neutral",
        "4 stars": "positive",
        "5 stars": "positive"
    }
    return label_mapping.get(label, "neutral")

# 감정 분석 수행
output_csv = os.path.join(output_folder, 'BERT.csv')

if os.path.exists(output_csv):
    news_data = pd.read_csv(output_csv)
else:
    news_data = pd.read_csv(input_csv)
    news_data['Predict'] = None  # 'Predict' 열로 변경

for index, row in news_data.iterrows():
    if pd.isna(row['Predict']):  # 감정 분석 결과가 없는 경우
        text_to_analyze = row['Title']  # Title만 분석
        response_label = sentiment_pipeline(text_to_analyze)[0]['label']  # 감정 레이블 ('1 star', '2 stars', etc.)
        sentiment = map_label_to_sentiment(response_label)

        # 감정 분석 결과 저장
        news_data.at[index, 'Predict'] = sentiment  # 'Predict' 열에 저장

        # CSV에 저장
        news_data.to_csv(output_csv, index=False, encoding='utf-8')

print("모든 데이터에 대해 감정 분석이 완료되었습니다.")


Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


모든 데이터에 대해 감정 분석이 완료되었습니다.


In [5]:
import pandas as pd
import os
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline

# 폴더 경로 설정
data_folder = 'Data'
output_folder = 'Sentiment'
os.makedirs(output_folder, exist_ok=True)

# CSV 파일 경로 설정
input_csv = os.path.join(data_folder, 'news_data.csv')

# FinBERT 모델 및 토크나이저 초기화
finbert_model_name = "yiyanghkust/finbert-tone"  # FinBERT 모델 이름
tokenizer = AutoTokenizer.from_pretrained(finbert_model_name)
model = AutoModelForSequenceClassification.from_pretrained(finbert_model_name)
sentiment_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)

# 감정 분석 수행
output_csv = os.path.join(output_folder, 'FinBERT.csv')

if os.path.exists(output_csv):
    news_data = pd.read_csv(output_csv)
else:
    news_data = pd.read_csv(input_csv)
    news_data['Predict'] = None  # 'Predict' 열로 변경

for index, row in news_data.iterrows():
    if pd.isna(row['Predict']):  # 감정 분석 결과가 없는 경우
        text_to_analyze = row['Title']  # Title만 분석
        response = sentiment_pipeline(text_to_analyze)[0]['label'].lower()  # 'positive', 'negative', 'neutral'

        # 감정 분석 결과 저장
        news_data.at[index, 'Predict'] = response

        # CSV에 저장
        news_data.to_csv(output_csv, index=False, encoding='utf-8')

print("모든 데이터에 대해 감정 분석이 완료되었습니다.")


Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


모든 데이터에 대해 감정 분석이 완료되었습니다.


In [6]:
import pandas as pd
import os
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

# 폴더 경로 설정
data_folder = 'Data'
output_folder = 'Sentiment'
os.makedirs(output_folder, exist_ok=True)

# CSV 파일 경로 설정
input_csv = os.path.join(data_folder, 'news_data.csv')

# RoBERTa-Finance 모델 초기화
roberta_model_name = "soleimanian/financial-roberta-large-sentiment"  # RoBERTa-Finance 모델 이름
tokenizer = AutoTokenizer.from_pretrained(roberta_model_name)
model = AutoModelForSequenceClassification.from_pretrained(roberta_model_name)
sentiment_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)

# 감정 분석 수행
output_csv = os.path.join(output_folder, 'RoBERTaFinance.csv')

if os.path.exists(output_csv):
    news_data = pd.read_csv(output_csv)
else:
    news_data = pd.read_csv(input_csv)
    news_data['Predict'] = None  # 'Predict' 열로 변경

for index, row in news_data.iterrows():
    if pd.isna(row['Predict']):  # 감정 분석 결과가 없는 경우
        text_to_analyze = row['Title']  # Title만 분석
        response = sentiment_pipeline(text_to_analyze)[0]['label'].lower()  # 'positive', 'negative', 'neutral'

        # 감정 분석 결과 저장
        news_data.at[index, 'Predict'] = response

        # CSV에 저장
        news_data.to_csv(output_csv, index=False, encoding='utf-8')

print("모든 데이터에 대해 감정 분석이 완료되었습니다.")


Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


모든 데이터에 대해 감정 분석이 완료되었습니다.


In [7]:
import pandas as pd
import os
import re
import ollama

# 폴더 경로 설정
data_folder = 'Data'
output_folder = 'Sentiment'
os.makedirs(output_folder, exist_ok=True)

# CSV 파일 경로 설정
input_csv = os.path.join(data_folder, 'news_data.csv')

# 참조할 모델별 CSV 파일 로드
reference_models = ["BERT", "FinBERT", "RoBERTaFinance"]
reference_data = {}

for ref_model in reference_models:
    ref_path = os.path.join(output_folder, f"{ref_model}.csv")
    if os.path.exists(ref_path):
        reference_data[ref_model] = pd.read_csv(ref_path)

def extract_sentiment(text):
    # Define the keywords
    keywords = ["positive", "negative", "neutral"]
    # Convert text to lowercase and find the earliest occurrence of any keyword
    text = str(text).lower()
    indices = {keyword: text.find(keyword) for keyword in keywords}
    # Find the keyword with the smallest index that is not -1
    earliest = min((index for index in indices.values() if index != -1), default=-1)
    for keyword, index in indices.items():
        if index == earliest:
            return keyword
    # Default to 'neutral' if no keyword is found
    return "neutral"

# 각 모델에 대해 sentiment 분석 수행
models_data = ollama.list()
models = [model['name'] for model in models_data['models']]  # 모델 이름 추출

for model_name in models:
    sanitized_model_name = re.sub(r'[^a-zA-Z0-9_-]', '_', model_name)
    
    for ref_model, ref_df in reference_data.items():
        # 모델별, 참조모델별 CSV 파일 경로 설정
        output_csv = os.path.join(output_folder, f'{sanitized_model_name}_{ref_model}-ICL.csv')
        
        # Load or create the CSV data
        if os.path.exists(output_csv):
            news_data = pd.read_csv(output_csv)
        else:
            news_data = pd.read_csv(input_csv)
            news_data['Sentiment Analysis'] = None  # Initialize with None for new column

        # Loop through each article and ask for sentiment analysis only if 'Sentiment Analysis' is empty
        for index, row in news_data.iterrows():
            if pd.isna(row['Sentiment Analysis']):  # Check if the sentiment analysis is missing
                # 참조 모델의 Predict 값 가져오기
                ref_predict = None
                ref_row = ref_df[ref_df['Title'] == row['Title']]
                if not ref_row.empty:
                    ref_predict = ref_row.iloc[0]['Predict']

                # Define the system message to set the role
                system_message = {
                    'role': 'system',
                    'content': "You are a stock analyst specializing in assessing sentiment in financial news articles."
                }

                # Define the question based on the article title and reference model prediction
                question = f"Based on the article titled '{row['Title']}', determine if the tone is positive, negative, or neutral toward {row['Symbol']}.\n" \
                           f"The sentiment prediction from {ref_model} is '{ref_predict}'.\n" \
                           "Please respond in the following format, and omit any reasoning:\n" \
                           "If your sentiment differs from the reference prediction, provide a brief reason why:\n" \
                           "Sentiment: [positive/negative/neutral]\nReason: [Provide reason only if your sentiment differs from the reference]"

                # Send the question to the model
                response = ollama.chat(model=model_name, messages=[
                    system_message,
                    {
                        'role': 'user',
                        'content': question,
                    },
                ])

                # Extract the response content
                sentiment_response = response['message']['content'] if 'message' in response and 'content' in response['message'] else "No response"

                # Update the sentiment analysis directly in the DataFrame
                news_data.at[index, 'Sentiment Analysis'] = sentiment_response

                # Save the DataFrame back to CSV after each response
                news_data.to_csv(output_csv, index=False, encoding='utf-8')

        # Apply the sentiment extraction function to create the Predict column
        news_data['Predict'] = news_data['Sentiment Analysis'].apply(extract_sentiment)

        # Save the updated DataFrame with the Predict column back to the CSV file
        news_data.to_csv(output_csv, index=False, encoding='utf-8')

print("모든 모델의 감정 분석이 완료되었습니다.")


모든 모델의 감정 분석이 완료되었습니다.


In [8]:
import pandas as pd
import os
import re
import ollama
from collections import Counter

# 폴더 경로 설정
data_folder = 'Data'
output_folder = 'Sentiment'
os.makedirs(output_folder, exist_ok=True)

# CSV 파일 경로 설정
input_csv = os.path.join(data_folder, 'news_data.csv')

# 참조할 모델별 CSV 파일 로드
reference_models = ["BERT", "FinBERT", "RoBERTaFinance"]
reference_data = {}

for ref_model in reference_models:
    ref_path = os.path.join(output_folder, f"{ref_model}.csv")
    if os.path.exists(ref_path):
        reference_data[ref_model] = pd.read_csv(ref_path)

def extract_sentiment(text):
    # Define the keywords
    keywords = ["positive", "negative", "neutral"]
    # Convert text to lowercase and find the earliest occurrence of any keyword
    text = str(text).lower()
    indices = {keyword: text.find(keyword) for keyword in keywords}
    # Find the keyword with the smallest index that is not -1
    earliest = min((index for index in indices.values() if index != -1), default=-1)
    for keyword, index in indices.items():
        if index == earliest:
            return keyword
    # Default to 'neutral' if no keyword is found
    return "neutral"

# 각 모델에 대해 sentiment 분석 수행
models_data = ollama.list()
models = [model['name'] for model in models_data['models']]  # 모델 이름 추출

for model_name in models:
    sanitized_model_name = re.sub(r'[^a-zA-Z0-9_-]', '_', model_name)
    
    for ref_model, ref_df in reference_data.items():
        # 모델별, 참조모델별 CSV 파일 경로 설정
        output_csv = os.path.join(output_folder, f'{sanitized_model_name}_{ref_model}-BOOTICL.csv')
        
        # Load or create the CSV data
        if os.path.exists(output_csv):
            news_data = pd.read_csv(output_csv)
        else:
            news_data = pd.read_csv(input_csv)
            news_data['Sentiment Analysis'] = None  # Initialize with None for new column

        # Loop through each article and ask for sentiment analysis only if 'Sentiment Analysis' is empty
        for index, row in news_data.iterrows():
            if pd.isna(row['Sentiment Analysis']):  # Check if the sentiment analysis is missing
                sentiments = []
                for _ in range(5):  # Repeat 5 times
                    # 참조 모델의 Predict 값 가져오기
                    ref_predict = None
                    ref_row = ref_df[ref_df['Title'] == row['Title']]
                    if not ref_row.empty:
                        ref_predict = ref_row.iloc[0]['Predict']

                    # Define the system message to set the role
                    system_message = {
                        'role': 'system',
                        'content': "You are a stock analyst specializing in assessing sentiment in financial news articles."
                    }

                    # Define the question based on the article title and reference model prediction
                    question = f"Based on the article titled '{row['Title']}', determine if the tone is positive, negative, or neutral toward {row['Symbol']}.\n" \
                               f"The sentiment prediction from {ref_model} is '{ref_predict}'.\n" \
                               "Please respond in the following format, and omit any reasoning:\n" \
                               "Sentiment: [positive/negative/neutral]"

                    # Send the question to the model
                    response = ollama.chat(model=model_name, messages=[
                        system_message,
                        {
                            'role': 'user',
                            'content': question,
                        },
                    ])

                    # Extract the response content
                    sentiment_response = response['message']['content'] if 'message' in response and 'content' in response['message'] else "neutral"
                    sentiment = extract_sentiment(sentiment_response)
                    sentiments.append(sentiment)

                # Determine the majority sentiment or default to 'neutral'
                sentiment_count = Counter(sentiments)
                majority_sentiment, count = sentiment_count.most_common(1)[0]
                if count >= 3:  # Majority rule
                    final_sentiment = majority_sentiment
                else:
                    final_sentiment = "neutral"

                # Update the sentiment analysis directly in the DataFrame
                news_data.at[index, 'Sentiment Analysis'] = final_sentiment

                # Save the DataFrame back to CSV after each response
                news_data.to_csv(output_csv, index=False, encoding='utf-8')

        # Save the updated DataFrame with the Predict column back to the CSV file
        news_data['Predict'] = news_data['Sentiment Analysis']
        news_data.to_csv(output_csv, index=False, encoding='utf-8')

print("모든 모델의 감정 분석이 완료되었습니다.")


모든 모델의 감정 분석이 완료되었습니다.
