In [1]:
# Import necessary libraries and set up logging
import pandas as pd
import ast
import random
import logging
from langchain import PromptTemplate
from langchain_aws import ChatBedrock
from langchain_core.output_parsers import StrOutputParser
from sklearn.metrics import accuracy_score

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [2]:
# Load the dataset and define a function to parse the sentiment column
dataset_path = "cryptonews.csv"
logging.info(f"Loading dataset from {dataset_path}")
df = pd.read_csv(dataset_path, names=["timestamp", "sentiment", "source", "category", "text", "title", "url"])

def parse_sentiment(sentiment_str):
    try:
        return ast.literal_eval(sentiment_str)['class']
    except (ValueError, SyntaxError, KeyError):
        return None

logging.info("Parsing sentiment column")
df['sentiment'] = df['sentiment'].apply(parse_sentiment)
df = df.dropna(subset=['sentiment'])

2024-08-30 15:14:18,545 - INFO - Loading dataset from cryptonews.csv
2024-08-30 15:14:18,872 - INFO - Parsing sentiment column


In [3]:
# Combine title and text, drop unnecessary columns, and select samples
logging.info("Combining title and text columns into content")
df['content'] = df['title'] + " " + df['text']
df = df.drop(columns=['timestamp', 'source', 'category', 'url'])

logging.info("Selecting 2500 random positive and negative samples")
positive_samples = df[df['sentiment'] == 'positive'].sample(n=2500, random_state=42)
negative_samples = df[df['sentiment'] == 'negative'].sample(n=2500, random_state=42)

logging.info("Combining and shuffling the dataset")
combined_df = pd.concat([positive_samples, negative_samples]).sample(frac=1).reset_index(drop=True)
texts = combined_df['content'].tolist()
sentiments = combined_df['sentiment'].tolist()
texts = [str(text) for text in texts]

2024-08-30 15:14:24,337 - INFO - Combining title and text columns into content
2024-08-30 15:14:24,384 - INFO - Selecting 2500 random positive and negative samples
2024-08-30 15:14:24,411 - INFO - Combining and shuffling the dataset


In [10]:
# Set up sentiment classification chain using LangChain with Bedrock model
logging.info("Setting up sentiment classification chain using LangChain with Bedrock model")
model_id = "anthropic.claude-3-haiku-20240307-v1:0"
llm = ChatBedrock(model_id=model_id)

# Updated sentiment template with examples
sentiment_template = PromptTemplate(
    template="""Classify the sentiment of this text: {text}
Examples:
Positive: "I love this product! It works great and exceeds my expectations."
Negative: "This is the worst experience I've ever had. Completely dissatisfied."
Positive: "Digital Asset Inflows Reach $1.76 Billion, Marking 10 Weeks of Growth: CoinShares Ethereum saw substantial inflows marking a five-week positive trend and reaching $134 million."
Negative: "Crypto Vesting Platform Team Finance Loses $14.5M in Bug Exploit The platform is willing to offer a bounty for the hackers to refund the stolen assets."
Negative: "Mango Markets Protocol Suffers $100 Million Exploit The Mango Markets protocol has experienced an exploit that allowed a person or group to get away with around $100m worth of crypto."
Positive: "Kraken Enters NFT Game with Waitlist for New Multi-Chain Marketplace Kraken has so far not confirmed when the new marketplace will be launched."
Negative: "Ireland's Central Bank Governor Labels Unbacked Cryptos as Ponzi Schemes Unbacked cryptocurrencies are more like a 'Ponzi scheme' than an investment, argued Central Bank of Ireland Governor Gabriel Makhlouf - but adding that 'crypto is not going away very soon.'"
Positive: "Best Crypto to Buy Today 12th January MEMAG, AVAX, FGHT, NEAR, CCHG, FLOW, RIA, AXS, D2T With the cryptocurrency market up in anticipation of strong US inflation data, we pick the best crypto to buy today."
Negative: "Coinbase Slapped With Another Lawsuit for Crashing During Market Volatility Coinbase’s latest lawsuit alleges that it failed to uphold customer account access when it mattered most."
Positive: "Cathie Wood's Ark 21Shares, VanEck Kick off Spot Ethereum ETF Race in U.S ARK 21Share and investment manager VanEck have filed for the first-ever Ether (ETH) spot Exchange-Traded Fund (ETF) in the United States. Read on to learn more."
""",
    input_variables=["text"]
)

parser = StrOutputParser()
sentiment_chain = sentiment_template | llm | parser

def classify_sentiment(text):
    result = sentiment_chain.invoke({"text": text})
    return "positive" if "positive" in result.lower() else "negative"

2024-08-30 15:32:06,863 - INFO - Setting up sentiment classification chain using LangChain with Bedrock model
2024-08-30 15:32:06,894 - INFO - Found credentials in shared credentials file: ~/.aws/credentials


In [11]:
# Classify samples and calculate accuracy
logging.info("Classifying 20 examples from each sentiment class and calculating accuracy")
sample_texts = positive_samples['content'].tolist()[:20] + negative_samples['content'].tolist()[:20]
sample_sentiments = ['positive'] * 20 + ['negative'] * 20

predicted_sentiments = []
for i, text in enumerate(sample_texts):
    sentiment = classify_sentiment(text)
    logging.info(f"{i+1}/{len(sample_texts)} - Text: {text}\nPredicted Sentiment: {sentiment}")
    predicted_sentiments.append(sentiment)

accuracy = accuracy_score(sample_sentiments, predicted_sentiments)
logging.info(f"Accuracy: {accuracy * 100:.2f}%")

2024-08-30 15:32:15,400 - INFO - Classifying 20 examples from each sentiment class and calculating accuracy
2024-08-30 15:32:20,484 - INFO - 1/40 - Text: Crypto Vesting Platform Team Finance Loses $14.5M in Bug Exploit The platform is willing to offer a bounty for the hackers to refund the stolen assets.\xa0
Predicted Sentiment: positive
2024-08-30 15:32:23,785 - INFO - 2/40 - Text: Digital Asset Inflows Reach $1.76 Billion, Marking 10 Weeks of Growth: CoinShares Ethereum saw substantial inflows marking a five-week positive trend and reaching $134 million.
Predicted Sentiment: positive
2024-08-30 15:32:25,511 - INFO - 3/40 - Text: Futures Funding Rates Normalize After Crypto Surge Forced Traders to Pay High Fees to Maintain Long Positions Funding rates on major token futures have started to return to normal levels after recent rally forced traders to pay unusually high fees to sustain their long positions.
Predicted Sentiment: positive
2024-08-30 15:32:29,296 - INFO - 4/40 - Text: Mang

In [12]:
# Define reverse sentiment chain and functions for adjective extraction and haiku generation
reverse_template = PromptTemplate(template="Reverse the sentiment of this text: {text}", input_variables=["text"])
reverse_chain = reverse_template | llm | parser

def reverse_sentiment(text):
    return reverse_chain.invoke({"text": text})

logging.info("Setting up adjective extraction chain using LangChain with Bedrock model")
adjective_template = PromptTemplate(template="Extract adjectives from this text: {text}", input_variables=["text"])
adjective_chain = adjective_template | llm | parser

def extract_adjectives(text):
    result = adjective_chain.invoke({"text": text})
    # Extract the adjectives from the response
    adjectives = result.split("\n")[1:]  # Skip the first line which is the header
    adjectives = [adj.split(". ")[1] for adj in adjectives if ". " in adj]  # Extract the adjective part
    logging.info(f"Extracted adjectives: {adjectives}")
    return adjectives

def generate_haiku(adjectives, theme):
    if len(adjectives) < 3:
        logging.warning("Not enough adjectives to generate a haiku.")
        return "Not enough adjectives to generate a haiku."
    haiku = f"{theme} are {adjectives[0]},\n{adjectives[1]} and {adjectives[2]},\n{theme} in the sky."
    return haiku

def process_review(review, theme):
    logging.info(f"Processing review for theme: {theme}")
    reversed_text = reverse_sentiment(review)
    adjectives = extract_adjectives(reversed_text)
    haiku = generate_haiku(adjectives, theme)
    return haiku

2024-08-30 15:35:22,864 - INFO - Setting up adjective extraction chain using LangChain with Bedrock model


In [13]:
# Example usage
review = "I watch this amazing movie every single night, the green actor is dreamy and I am in love with the great story."
logging.info("Original Review: " + review)
logging.info("Reversed Sentiment Review: " + reverse_sentiment(review))
logging.info("Extracted Adjectives: " + str(extract_adjectives(reverse_sentiment(review))))
logging.info("Haiku about birds: " + process_review(review, "birds"))
logging.info("Haiku about octopuses: " + process_review(review, "octopuses"))

2024-08-30 15:35:28,043 - INFO - Original Review: I watch this amazing movie every single night, the green actor is dreamy and I am in love with the great story.
2024-08-30 15:35:28,544 - INFO - Reversed Sentiment Review: I despise this abysmal movie every single night, the green actor is repulsive and I loathe the dreadful story.
2024-08-30 15:35:29,991 - INFO - Extracted adjectives: ['amazing', 'single', 'green', 'dreamy', 'great']
2024-08-30 15:35:29,991 - INFO - Extracted Adjectives: ['amazing', 'single', 'green', 'dreamy', 'great']
2024-08-30 15:35:29,991 - INFO - Processing review for theme: birds
2024-08-30 15:35:31,184 - INFO - Extracted adjectives: ['amazing', 'single', 'green', 'dreamy', 'great']
2024-08-30 15:35:31,184 - INFO - Haiku about birds: birds are amazing,
single and green,
birds in the sky.
2024-08-30 15:35:31,199 - INFO - Processing review for theme: octopuses
2024-08-30 15:35:32,629 - INFO - Extracted adjectives: ['terrible', 'annoying', 'dreadful']
2024-08-30 15