In [None]:
!pip install transformers
!pip install datsets transformers[sentencepiece]
!pip install sentencepiece
!pip install --quiet pytorch-lightning
!pip install --quiet torchtext
!pip install --upgrade accelerate

Collecting transformers
  Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m48.3 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.8/236.8 kB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m95.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m53.2 MB/s[0m eta [36m0:00:0

In [None]:
from transformers import (
    AdamW,
    T5ForConditionalGeneration,
    T5Tokenizer,
    get_linear_schedule_with_warmup
)
import torch

topic_trained_model_path = '/content/drive/MyDrive/fyp/model'
topic_trained_tokenizer = '/content/drive/MyDrive/fyp/tokenizer'

topic_model = T5ForConditionalGeneration.from_pretrained(topic_trained_model_path)
topic_tokenizer = T5Tokenizer.from_pretrained(topic_trained_tokenizer)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print ("device ",device)
topic_model = topic_model.to(device)

def topic_modeling(text) :
    topic = ""
    encoding = topic_tokenizer.encode_plus(text,max_length =512, padding=True, return_tensors="pt")
    input_ids,attention_mask  = encoding["input_ids"].to(device), encoding["attention_mask"].to(device)

    beam_outputs = topic_model.generate(
        input_ids=input_ids,attention_mask=attention_mask,
        max_length=72,
        early_stopping=True,
        num_beams=5,
        num_return_sequences=1

    )

    for beam_output in beam_outputs:
        sent = topic_tokenizer.decode(beam_output, skip_special_tokens=True,clean_up_tokenization_spaces=True)
        topic = sent

    topic = topic.split(':')[1]
    print (topic)
    return topic

device  cpu


In [None]:
!pip install Keras-Preprocessing


Collecting Keras-Preprocessing
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.6/42.6 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Keras-Preprocessing
Successfully installed Keras-Preprocessing-1.1.2


In [None]:
from tensorflow.keras.models import load_model
from keras_preprocessing.sequence import pad_sequences
import pickle
SEQUENCE_LENGTH =300
sentiment_trained_model_path = '/content/drive/MyDrive/fyp/final/model.h5'
sentiment_trained_tokenizer_path = '/content/drive/MyDrive/fyp/final/tokenizer.pkl'

sentiment_model = load_model(sentiment_trained_model_path)
# Load the tokenizer
with open(sentiment_trained_tokenizer_path, 'rb') as f:
    sentiment_tokenizer = pickle.load(f)

def sentiment_score_prediction(text):
    x_test = pad_sequences(sentiment_tokenizer.texts_to_sequences([text]), maxlen=SEQUENCE_LENGTH)
    score = sentiment_model.predict([x_test])[0]
    return score[0]

In [None]:
text = "A provision of the U.S. debt ceiling bill that streamlined the federal approval process for the $6.6 billion Mountain Valley Pipeline and limited court reviews of challenges to the project violates the U.S. Constitution’s separation of powers doctrine, opponents of the pipeline have claimed.Today, after months and years being held up the government just released this: 'Mountain Valley is authorized by the Commission to proceed with all construction activities at all locations in accordance with federal authorizations, notwithstanding any request for rehearing of this order.' Mountain Valley Pipeline LLC - a joint project between Equitrans, NextEra Energy Inc., Consolidated Edison Inc., AltaGas Ltd and RGC Resources Inc. - has said the pipeline is over 94% complete.VERY BULLISH!"
sentiment_score = sentiment_score_prediction(topic_modeling(text))



 Mountain Valley Pipeline LLC - a joint project between Equitrans, NextEra Energy Inc., Consolidated Edison Inc., AltaGas Ltd. - has said the pipeline is over 94% complete.


In [None]:
print (sentiment_score)

0.9064671


In [None]:
sentiment_score = sentiment_score_prediction("$ETRN Equitrans has amazing value right now")



In [None]:
print (sentiment_score)

0.9064671


In [None]:
import pandas as pd

# Step 1: Import the required libraries

# Step 2: Read the CSV file
data = pd.read_csv('/content/drive/MyDrive/fyp/dataset/extracted_btc.csv')

# Step 3: Create an empty list to store the sentiment scores
sentiment_scores = []

# Step 4: Iterate over each row in the CSV file
for index, row in data.iterrows():
    body_text = row['text']


    score = sentiment_score_prediction(body_text)
    sentiment_scores.append(score)

# Step 5: Add the sentiment scores as a new column in the DataFrame
data['sentiment_score'] = sentiment_scores

# Step 6: Save the updated DataFrame to a new CSV file
data.to_csv('output_csv_file_btc.csv', index=False)

# Step 6: Save the updated DataFrame to a new CSV file
output_file = 'output_csv_file_btc.csv'
data.to_csv(output_file, index=False)

# Step 7: Download the CSV file
from google.colab import files
files.download(output_file)




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import pandas as pd

# Step 1: Import the required libraries

# Step 2: Read the CSV file
data = pd.read_csv('/content/drive/MyDrive/fyp/dataset/extracted.csv')

# Step 3: Create an empty list to store the sentiment scores
sentiment_scores = []

# Step 4: Iterate over each row in the CSV file
for index, row in data.iterrows():
    body_text = row['text']


    score = sentiment_score_prediction(body_text)
    sentiment_scores.append(score)

# Step 5: Add the sentiment scores as a new column in the DataFrame
data['sentiment_score'] = sentiment_scores

# Step 6: Save the updated DataFrame to a new CSV file
data.to_csv('output_csv_file.csv', index=False)

# Step 6: Save the updated DataFrame to a new CSV file
output_file = 'output_csv_file.csv'
data.to_csv(output_file, index=False)

# Step 7: Download the CSV file
from google.colab import files
files.download(output_file)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import pandas as pd

# Step 1: Import the required libraries

# Step 2: Read the CSV file
data = pd.read_csv('/content/drive/MyDrive/fyp/dataset/extracted.csv')

# Step 3: Create an empty list to store the sentiment scores
sentiment_scores = []

# Step 4: Iterate over each row in the CSV file
for index, row in data.iterrows():
    body_text = row['preprocessed_body']
    title_text = row['preprocessed_title']

    # Concatenate body and title if desired
    text = f"{body_text} {title_text}"

    score = sentiment_score_prediction(text)
    sentiment_scores.append(score)

# Step 5: Add the sentiment scores as a new column in the DataFrame
data['sentiment_score'] = sentiment_scores

# Step 6: Save the updated DataFrame to a new CSV file
data.to_csv('output_csv_file.csv', index=False)

# Step 6: Save the updated DataFrame to a new CSV file
output_file = 'output_csv_file.csv'
data.to_csv(output_file, index=False)

# Step 7: Download the CSV file
from google.colab import files
files.download(output_file)


