# Model Testing

Loading the model and tokenizer from saved state and processing some current headlines

In [1]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

### Loading the model and tokenizer

In [None]:
model = AutoModelForSequenceClassification.from_pretrained("jvdm_sp500_model")
tokenizer = AutoTokenizer.from_pretrained("jvdm_sp500_model")

### Preparing the input data

In [3]:
import re
import nltk
from nltk.corpus import stopwords

nltk.download('stopwords')

def process_text(text): 
    text = re.sub('[^a-zA-Z]', ' ', text).lower()
    words = text.split()
    
    all_stopwords = set(stopwords.words('english'))
    all_stopwords.discard('not')
    
    filtered_words = [word for word in words if word not in all_stopwords]

    seen = set()
    unique_words = [word for word in filtered_words if not (word in seen or seen.add(word))]

    return ' '.join(unique_words)

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/jeroen/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [6]:
headlines_20240816 = "Amazon’s troubled drone delivery program faces latest challenge in Texas: Annoyed residents Stocks close higher Friday as market comeback lifts S&P 500 to best week of 2024Judge temporarily blocks sports streaming service Venu, siding with Fubo on antitrust concernsHarris calls for expanded child tax credit of up to $6,000 for families with newbornsFrequent media bidder Byron Allen draws ire with late payments to ABC, CBS and NBC Harris outlines an ‘opportunity economy’ centered on cutting food, housing, family costs GOP megadonor Miriam Adelson plans to do whatever it takes to help Trump win Streaming is getting more expensive for consumers. Here’s why Disney’s ‘Deadpool & Wolverine’ becomes the highest-grossing R-rated film of all time Trump owns more than $1 million in crypto and made $300K on branded Bibles, financial disclosure shows Bayer shares soar 11% after key U.S. legal win against Roundup cancer claims The federal minimum wage has been $7.25 for 15 years. How the election may change that British fintech Revolut valued at $45 billion in secondary share sale Inside the $93 million Wall Street heist that stemmed from Russia EV maker Rivian halts production of Amazon delivery vans amid parts shortage"

In [8]:
input_text = process_text(headlines_20240816)

In [9]:
input_text

'amazon troubled drone delivery program faces latest challenge texas annoyed residents stocks close higher friday market comeback lifts p best week judge temporarily blocks sports streaming service venu siding fubo antitrust concernsharris calls expanded child tax credit families newbornsfrequent media bidder byron allen draws ire late payments abc cbs nbc harris outlines opportunity economy centered cutting food housing family costs gop megadonor miriam adelson plans whatever takes help trump win getting expensive consumers disney deadpool wolverine becomes highest grossing r rated film time owns million crypto made k branded bibles financial disclosure shows bayer shares soar key u legal roundup cancer claims federal minimum wage years election may change british fintech revolut valued billion secondary share sale inside wall street heist stemmed russia ev maker rivian halts production vans amid parts shortage'

### Inference

In [None]:
inputs = tokenizer(input_text, return_tensors="pt")
outputs = model(**inputs)

In [11]:
predictions = outputs.logits.argmax(-1)
print(predictions)

tensor([1])


--> Prediction: the S&P500 will close between -0.5% and +0.5% on Monday, August 19 2024