In [8]:
import random
from datetime import datetime
import pandas as pd
stock_data = pd.read_csv("stock_data_5_years.csv")

In [9]:
# Function to generate sentiment based on price movement and RSI
def generate_sentiment(close, open_price, rsi):
    if close > open_price and rsi < 70:
        return "bullish", random.choice(["optimistic", "excited", "confident"])
    elif close < open_price and rsi > 30:
        return "bearish", random.choice(["worried", "anxious", "disappointed"])
    else:
        return "neutral", random.choice(["calm", "uncertain", "indifferent"])

In [10]:
# Generate textual data
textual_data = []

for index, row in stock_data.iterrows():
    date = datetime.strptime(row['date'].split(' ')[0], '%Y-%m-%d').strftime('%Y-%m-%d')
    ticker = row['ticker']
    open_price = row['Open']
    close_price = row['Close']
    volume = row['Volume']
    rsi = row['RSI'] if not pd.isnull(row['RSI']) else 50  # Neutral RSI if missing

    sentiment, emotion = generate_sentiment(close_price, open_price, rsi)

    # Generate a tweet-like text
    if sentiment == "bullish":
        text = f"Strong day for {ticker}! Closed higher at {close_price:.2f}, volume surging at {volume}. Feeling {emotion}!"
    elif sentiment == "bearish":
        text = f"{ticker} struggled today, dropping to {close_price:.2f}. Investors are {emotion} amid heavy trading volume of {volume}."
    else:
        text = f"{ticker} had a steady day, closing at {close_price:.2f}. Market seems {emotion} with moderate activity."

    textual_data.append({
        "id": index,
        "date": date,
        "ticker": ticker,
        "emo_label": emotion,
        "senti_label": sentiment,
        "original": text,
        "processed": text.lower()
    })

In [11]:
# Convert to DataFrame
textual_df = pd.DataFrame(textual_data)

# Display the first few generated texts
textual_df.head()

Unnamed: 0,id,date,ticker,emo_label,senti_label,original,processed
0,0,2020-01-17,AAPL,optimistic,bullish,"Strong day for AAPL! Closed higher at 77.25, v...","strong day for aapl! closed higher at 77.25, v..."
1,1,2020-01-21,AAPL,anxious,bearish,"AAPL struggled today, dropping to 76.73. Inves...","aapl struggled today, dropping to 76.73. inves..."
2,2,2020-01-22,AAPL,disappointed,bearish,"AAPL struggled today, dropping to 77.00. Inves...","aapl struggled today, dropping to 77.00. inves..."
3,3,2020-01-23,AAPL,confident,bullish,"Strong day for AAPL! Closed higher at 77.37, v...","strong day for aapl! closed higher at 77.37, v..."
4,4,2020-01-24,AAPL,disappointed,bearish,"AAPL struggled today, dropping to 77.15. Inves...","aapl struggled today, dropping to 77.15. inves..."


In [12]:
# Save the dataframe to a CSV file
refined_textual_df = "refined_textual_data.csv"
textual_df.to_csv(refined_textual_df, index=False, encoding='utf-8')
print(f"DataFrame 'textual_df' successfully saved to {refined_textual_df}")

DataFrame 'textual_df' successfully saved to refined_textual_data.csv
