## Get started
- install the requirements to run mamba
- run `pip install requirements_predict.txt`

In [1]:
import finnhub
import yfinance as yf
import time
from datetime import datetime
import json
import tqdm

In [2]:
START_DATE = "2022-01-01"
END_DATE = "2023-11-30"

# make sure to add your finnhub API key
finnhub_client = finnhub.Client(api_key="cfnqtqpr01qr96uommegcfnqtqpr01qr96uommf0")

DOW_30 = [
    "AXP", "AMGN", "AAPL", "BA", "CAT", "CSCO", "CVX", "GS", "HD", "HON",
    "IBM", "INTC", "JNJ", "KO", "JPM", "MCD", "MMM", "MRK", "MSFT", "NKE",
    "PG", "TRV", "UNH", "CRM", "VZ", "V", "WBA", "WMT", "DIS", "DOW"
]

In [14]:
def get_news(symbol):
    '''
    gets the news for a ticker form finnhub
    '''   
    start_date = START_DATE
    end_date = END_DATE
    news = finnhub_client.company_news(symbol, _from=start_date, to=end_date)
    news = [{
        'symbol': x['related'],
        'datetime': x['datetime'],
        'headline': x['headline'],
        'summary': x['summary'],
        'source': x['source'],
        'link': x['url']
        
    } for x in news]
    return news

In [4]:
import torch
from mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel
from transformers import AutoTokenizer

model_path = '/files/my_trains/mamba_FPB/complete'

model = MambaLMHeadModel.from_pretrained(model_path, dtype=torch.bfloat16, device="cuda")
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.eos_token = "<|endoftext|>"
tokenizer.pad_token = tokenizer.eos_token
    
def get_sentiment(input):
    '''
    get's the sentiment with out mamaba model
    '''
    text = f"""Classify the setiment of the following news headlines as either `positive`, `neutral`, or `negative`.\n
    Headline: {input}\n
    Classification:"""

    input_ids = tokenizer(text, return_tensors="pt").input_ids.to("cuda")

    out = model.generate(
        input_ids=input_ids, 
        max_length=250, 
        temperature=0.9, 
        top_p=0.7, 
        eos_token_id=tokenizer.eos_token_id
    )

    decoded = tokenizer.decode(out[0], skip_special_tokens=True)
    extracted = decoded.split('Classification: ')[-1].strip()

    return extracted

  from .autonotebook import tqdm as notebook_tqdm
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [17]:
dataset = []
for symbol in tqdm(DOW_30):
    dataset.append(get_news(symbol))

100%|██████████| 30/30 [00:03<00:00,  7.84it/s]


In [10]:
for i, ni in enumerate(tqdm(dataset)):
    dataset[i]['headline_sentiment'] = get_sentiment(ni['headline'])
    dataset[i]['summary_sentiment'] = get_sentiment(ni['summary'])

 19%|█▉        | 43/226 [00:55<01:47,  1.71it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (1076 > 200). Running this sequence through the model will result in indexing errors
100%|██████████| 226/226 [04:41<00:00,  1.25s/it]


In [11]:
news

[{'category': 'company',
  'datetime': 1701375480,
  'headline': 'Apple’s Credit Card Partnership With Goldman Sachs Could Be Over. Why Apple Pay Will Still Thrive.',
  'id': 124210219,
  'image': 'https://s.yimg.com/ny/api/res/1.2/DmpknTtZAfrMHCP83Wawgw--/YXBwaWQ9aGlnaGxhbmRlcjt3PTEyMDA7aD02MDA-/https://media.zenfs.com/en/Barrons.com/32cb85de80efc74eae974973af61acfb',
  'related': 'AAPL',
  'source': 'Yahoo',
  'summary': 'Earlier this week, The Wall Street Journal reported that Apple is looking to end its credit card partnership with  Goldman Sachs  within the next 12 to 15 months.  Apple hasn’t responded to a request for comment, but it’s easy to see why the four-year old partnership would be ending.  Goldman has largely exited from consumer banking, selling lending platform GreenSky at a loss and suffered more than $3 billion in losses from its push into consumer lending.',
  'url': 'https://finnhub.io/api/news?id=cb05fbab1449c5c822753d43f9a93dd9ea358789c1787fb30953906a3f24b25c',
 

In [33]:
data_loader

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,
