In [2]:
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import numpy as np
from tqdm import tqdm

In [13]:
# Load a pretrained sentence transformer model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Function to find the common idea between two sentences
def find_common_idea(sentences):
    # Encode the sentences
    embeddings = model.encode(sentences, convert_to_tensor=True)
    
    # Compute cosine similarities
    cosine_similarities = util.pytorch_cos_sim(embeddings, embeddings)
    
    for i in range(len(sentences)):
        cosine_similarities[i][i] = -1
    
    # Find the most similar pair of sentences
    most_similar_pair = np.unravel_index(cosine_similarities.argmax(), cosine_similarities.shape)
    # Return the sentences with the highest similarity score
    return sentences[most_similar_pair[0]], sentences[most_similar_pair[1]], cosine_similarities[most_similar_pair].item()

# Example sentences
sentences = [
    "I live in the countryside of Mexico.",
    "I do not like Americans.",
    "I reside countryside of Germany",
]

# Find the common idea
sentence1, sentence2, similarity = find_common_idea(sentences)

print(f"Sentence 1: {sentence1}")
print(f"Sentence 2: {sentence2}")
print(f"Similarity Score: {similarity}")

Sentence 1: I live in the countryside of Mexico.
Sentence 2: I reside countryside of Germany
Similarity Score: 0.5935172438621521


In [44]:
# Install necessary libraries
%pip install -r requirements.txt
# Imports
from transformers import pipeline
import pandas as pd


You should consider upgrading via the '/usr/local/bin/python3.9 -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.


In [35]:

# Load the sentiment analysis model
sentiment_model = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment-latest")

def classify_sentiments(messages):
    # Get sentiment predictions
    results = sentiment_model(messages)
    print(results)
    # Process the results to get labels
    classified_results = []
    for result in results:
        label = result['label']
        if 'positive' in label:
            classified_results.append('POSITIVE')
        elif 'negative' in label:
            classified_results.append('NEGATIVE')
        else:
            classified_results.append('NEUTRAL')
    
    return classified_results

# Example comments
comments = [
    "very bad",
    "it is to my liking",
    "I dont care about the product, but i also dont hate it",
]

# Classify the sentiments
classified_comments = classify_sentiments(comments)

# Display the results
for comment, sentiment in zip(comments, classified_comments):
    print(f"Comment: {comment}\nSentiment: {sentiment}\n")

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[{'label': 'negative', 'score': 0.7818949222564697}, {'label': 'positive', 'score': 0.6411069631576538}, {'label': 'neutral', 'score': 0.4318191111087799}]
Comment: very bad
Sentiment: NEGATIVE

Comment: it is to my liking
Sentiment: POSITIVE

Comment: I dont care about the product, but i also dont hate it
Sentiment: NEUTRAL



In [52]:
text = "summarize: The best Lenovo laptop for most people is the Lenovo Slim Pro 7. It offers plenty of power and battery life inside a compact and durable all-metal chassis for a reasonable price. It costs $1,200 when it’s not on sale but can usually be found with a $300 discount at Best Buy, dropping the price to an attractive $900. It’s rare to find this mix of performance, portability and build quality for less than $1,000. The 14.5-inch, 2.5K display is large enough to work on for long stretches without feeling too cramped, and the 3.5-inch weight is light for regular travel. The combination of an AMD Ryzen 7 CPU and RTX 3050 graphics provides ample performance for most users, including content creators and media editors."
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
summarizer(text, max_length=15, min_length=10, do_sample=True)

KeyboardInterrupt: 

In [48]:
from bertopic import BERTopic
from sklearn.datasets import fetch_20newsgroups

ModuleNotFoundError: No module named 'bertopic'

In [None]:
# fetch an example dataset from sklearn
docs = fetch_20newsgroups(subset='train')['data']

model = BERTopic(language="english")
topics, probs = model.fit_transform(docs)
print(topics)