In [67]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from bs4 import BeautifulSoup
from transformers import pipeline
import csv

data = pd.read_csv('news_data.csv')
data.columns = ["Issuer", "Seinet ID", "Content", "Date", "Attachments"]

data.dropna(subset=['Attachments'], inplace=True)

def preprocess_text(text):
    soup = BeautifulSoup(text, "html.parser")
    cleaned_text = soup.get_text()  
    return cleaned_text.lower().strip()

data['Cleaned Attachments'] = data['Attachments'].apply(preprocess_text)

vectorizer = CountVectorizer(stop_words='english', max_features=1000)
text_matrix = vectorizer.fit_transform(data['Cleaned Attachments'])

lda_model = LatentDirichletAllocation(n_components=3, random_state=42)  
lda_topics = lda_model.fit_transform(text_matrix)

data['Topic'] = lda_topics.argmax(axis=1)

sentiment_model = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')

def get_sentiment(text):
    result = sentiment_model(text[:512])  
    return result[0]['label']

data['Sentiment'] = data['Attachments'].apply(get_sentiment)

issuer_sentiment = data.groupby('Issuer')['Sentiment'].agg(lambda x: x.mode()[0])  # Most common sentiment

issuer_sentiment = issuer_sentiment.map({'POSITIVE': 'buy', 'NEGATIVE': 'sell'})
data['Recommendation'] = data['Sentiment'].map({'POSITIVE': 'buy', 'NEGATIVE': 'sell'})

data = data.merge(issuer_sentiment, on='Issuer', suffixes=('', '_Recommendation'))

data_final = data[['Issuer', 'Recommendation']]

with open("sentimental_analysis.csv", mode="w", newline="", encoding="utf-8") as csvfile:
    csv_writer = csv.writer(csvfile)
    
    csv_writer.writerow(["Issuer", "Recommendation"])
    
    for row in data_final.itertuples(index=False, name=None):
        csv_writer.writerow(row)

print("Data successfully written to 'sentimental_analysis.csv'")


Device set to use cpu


Data successfully written to 'sentimental_analysis.csv'
