In [None]:
import pandas as pd
from transformers import T5Tokenizer, T5ForConditionalGeneration

In [None]:
data = pd.read_csv("news_sentences.csv")

In [None]:
def split_into_sentences(s):
    # Remove the leading and trailing square brackets and single quotes
    s = s[2:-2]

    # Split the string into a list of sentences
    sentences = s.split("', '")

    return sentences

# Apply the function to the 'sentences' column
data['sentences'] = data['sentences'].apply(split_into_sentences)

In [None]:
company, sentences = data.iloc[0][0], data.iloc[0][3]

In [None]:
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")

In [None]:
first_100 = data.head(100)

In [None]:
def generate_answer(row):
    # Initialize an empty list to store the answers
    answers = []

    # Iterate over all sentences
    for sentence in row['sentences']:
        # Prepare the input text
        input_text = f"Based on this sentence can you tell me if the stock of {row['company']} is going to go up, down or no information : {sentence}"

        # Tokenize the input text
        input_ids = tokenizer(input_text, return_tensors="pt").input_ids

        # Generate the output
        outputs = model.generate(input_ids)

        # Decode the output and add it to the list of answers
        answers.append(tokenizer.decode(outputs[0]))

    # Return the list of answers
    return answers

# Apply the function to each row in the dataframe
first_100['T5_answer'] = first_100.apply(generate_answer, axis=1)

In [None]:
print(first_100[["sentences", "T5_answer"]])

In [None]:
# Convert the DataFrame to a single series
flat_series = first_100.T5_answer.explode()

# Count the frequency of each unique element
counts = flat_series.value_counts()

# Print the counts
print(counts)