In [2]:
import pickle
import torch
import torch.nn as nn
from transformers import BertTokenizer, BertModel
import pandas as pd
import numpy as np
# loading all models first

# Load the model company
with open('xgboost_model_compVsnifty.pkl', 'rb') as f:
    loaded_model1 = pickle.load(f)
    
# load nasdaq model
with open('xgboost_model_niftyVsnasdaq.pkl', 'rb') as f:
    loaded_model2 = pickle.load(f)

# load bert model
class SentimentAnalysisModel(nn.Module):
    def __init__(self):
        super(SentimentAnalysisModel, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Linear(self.bert.config.hidden_size, 1)  # Output size is 1

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        pooled_output = self.dropout(pooled_output)
        outputs = self.classifier(pooled_output)
        return outputs

# load the saved model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
loaded_model3 = SentimentAnalysisModel()
loaded_model3.load_state_dict(torch.load('bert_stock_sentiment_model.pth', map_location=device))
loaded_model3.to(device)
loaded_model3.eval()


  loaded_model3.load_state_dict(torch.load('bert_stock_sentiment_model.pth', map_location=device))


SentimentAnalysisModel(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, el

In [3]:
# company
#change values as input comes
X_test1_dict = {
    'Infosys net profit': [1.6],
    'Nifty change_profit': [-0.29],
    'Nifty change_close': [1],
    'TCS change_profit': [0],
    'Infosys change_profit': [0.65],
    'HCL change_profit': [1],
    'Nifty change_sales': [1],
    'TCS change_sales': [-0.12],
    'Infosys change_sales': [-1],
    'Last_close': [-1]
}

X_test1 = pd.DataFrame(X_test1_dict)

y_pred1= loaded_model1.predict(X_test1)
y_pred_proba1 = loaded_model1.predict_proba(X_test1)
y_pred1

array([1])

In [4]:
# nasdaq
# change the values as the input comes.
X_test2_dict = {
    'nasdaq_close_percentage_change': [0.03],
    'nifty_close_percentage_change': [1],
    'nasdaq_open_percentage_change': [-0.45],
    'nifty_open_percentage_change': [0.45],
    'nasdaq_high_percentage_change': [-0.23],
    'nifty_high_percentage_change': [0.56],
    'nasdaq_low_percentage_change': [-0.01],
    'nifty_low_percentage_change': [0]
}

X_test2 = pd.DataFrame(X_test2_dict)

y_pred2 = loaded_model2.predict(X_test2)
y_pred_proba2 = loaded_model2.predict_proba(X_test2)
y_pred2

array([1])

In [5]:
# bert sentiment analysis

# Assuming 'model' and 'tokenizer' are already defined and loaded as per your original code

def prepare_input(texts):
    # Check if the input is a list or a single string
    if isinstance(texts, str):
        texts = [texts]  # Convert single string to a list

    # Prepare the input data for each text in the list
    input_ids = []
    attention_masks = []
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    for text in texts:
        inputs = tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=512,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        input_ids.append(inputs['input_ids'])
        attention_masks.append(inputs['attention_mask'])

    # Stack the inputs to create tensors for batch processing
    input_ids = torch.cat(input_ids, dim=0).to(device)
    attention_masks = torch.cat(attention_masks, dim=0).to(device)

    # Make predictions for the entire batch
    with torch.no_grad():
        outputs = loaded_model3(input_ids, attention_mask=attention_masks)

    # Apply sigmoid activation and threshold to get predictions
    threshold = 0.5
    predictions = torch.sigmoid(outputs).squeeze().cpu().numpy()

    # Use numpy.where to assign -1 for bad sentiment, 1 for good sentiment
    sentiment_output = np.where(predictions > threshold, 1, -1)

    return sentiment_output

# This will be changed by the news extracted from website.
text_2 = ['Nifty falls short of its targets by a lot','Once held by Quant MF, this small cap stock is up 20% and here is why','PNB stock slips 2.5% after QIP launched at discounted price']

# Predict for a list of sentences
list_pred = prepare_input(text_2)
print(f"List of sentences prediction: {list_pred}")




List of sentences prediction: [-1  1  1]


In [6]:
y_pred3= np.mean(list_pred)
if(y_pred3<0):
    y_pred3=0
y_pred3

0.3333333333333333

In [7]:
def weighted_voting(predictions, weights):
    weighted_sum = sum(p * w for p, w in zip(predictions, weights))
    return 1 if weighted_sum > 0.5 * sum(weights) else 0

weights = [0.3, 0.4, 0.3]
final_prediction = weighted_voting([y_pred1,y_pred2,y_pred3], weights)
print(final_prediction)  # Output depends on the weighted sum


1
