In [None]:
from transformers import BertForSequenceClassification, BertTokenizer
import torch

# initialize our model and tokenizer
tokenizer = BertTokenizer.from_pretrained('ProsusAI/finbert')
model = BertForSequenceClassification.from_pretrained('ProsusAI/finbert')

def sentiment(tokens):
    # get output Logits from the model
    output = model(**tokens)
    # convert to probabilities
    probs = torch.nn.functional.softmax(output[0], dim=-1)
    # we will return the probability tensor (we will not need argmax until later)
    return probs

In [None]:
txt = """
Hagley railway station serves the English village of Hagley, Worcestershire. Off peak trains call three times an hour in each direction, running to or through Kidderminster westwards and through Stourbridge and Birmingham Snow Hill eastwards. Additional trains also call during the morning and evening rush hours. A half hourly service runs in the late evenings and an hourly service on Sundays.[1] Customer Information Screens are installed on either platform. Since the 2017 change of franchise, services are run by West Midlands Trains.[2]

The nearest railway stations are Stourbridge Junction (towards Birmingham) and Blakedown (towards Kidderminster and Worcester).

The station retains one of its GWR-era station buildings and its canopied footbridge, both dating from 1884. Although typical of its era, very few examples of that kind of ornamental ironwork bridge now survive and it was listed grade 2 in 2000.[3] When it was refurbished in late 2011, the colours reverted from its former navy blue and white to the original GWR cream and salmon livery.[4] The footbridge was also used by Hornby as the basis for its 00 Gauge model.[5]

History
The original village of Hagley was a mile away uphill; when its station first appeared in timetables in 1862 as part of the Oxford, Worcester and Wolverhampton railway, it was a rough and ready structure with platforms built of old sleepers. With the line subsequently being taken over by GWR, and the expansion of Lower Hagley along the nearby road to Worcester, there was a demand for a proper building with a station approach up to it.[6] Canopied brick buildings were constructed on either side of the line at this time. While the one on the Stourbridge side housed waiting rooms and toilets, on the Kidderminster and Station Drive side there was the stationmaster's office, the ticket office, and two more waiting rooms and toilets.

Under him the stationmaster had a booking clerk and three porters as well as someone to deliver parcels and personal luggage. There were also three signalmen at the box beyond the Brake Lane bridge and maintenance workers responsible for the track and embankments. The signal box has now gone while the building on the Stourbridge platform has been demolished and replaced with a metal shelter. In the former goods yard north of the station on Brake Lane were coal merchants and the offices responsible for dealing with livestock brought by train for sale at the Monday cattle market, which was located uphill in the old village (at the junction of the A491 and the A456). Where the sidings for coal trucks and the cattle pens used to be, there is now a private housing development named The Sidings after the site.
"""

tokens = tokenizer.encode_plus(txt, add_special_tokens=False)

len(tokens['input_ids'])

In [None]:
input_ids = tokens['input_ids']
attention_mask = tokens['attention_mask']

In [None]:
input_ids[16:32]

In [None]:
start = 0
window_size = 512

total_len = len(input_ids)

loop = True

while loop:
    end = start + window_size
    
    if end >= total_len:
        loop = False
        end = total_len

    print(f'{start=}\n{end=}')
    start = end

In [None]:
probs_list = []

start = 0
window_size = 510

total_len = len(input_ids)

loop = True

while loop:
    end = start + window_size
    
    if end >= total_len:
        loop = False
        end = total_len

    # extract window from input_ids and attention_mask
    input_ids_chunk = [101] + input_ids[start:end] + [102]
    attention_mask_chunk = [1] + attention_mask[start:end] + [1]
    
    input_ids_chunk += [0] * (window_size - len(input_ids_chunk) + 2)
    attention_mask_chunk += [0] * (window_size - len(attention_mask_chunk) + 2)
                                   
    input_dict = {
        'input_ids': torch.Tensor([input_ids_chunk]).long(),
        'attention_mask': torch.Tensor([attention_mask_chunk]).int()
    }
    
    print(len(input_ids_chunk))
    
    probs_list.append(sentiment(input_dict))
                                   
    print(f'{start=}\n{end=}')
    start = end

In [None]:
probs_list

In [None]:
stack = torch.stack(probs_list)
stack

In [None]:
stack.shape

In [None]:
stack.shape[0], stack.shape[2]

In [None]:
stack.resize_(stack.shape[0], stack.shape[2])

In [None]:
with torch.no_grad():
    stack = torch.stack(probs_list)
    stack = stack.resize_(stack.shape[0], stack.shape[2])
    avg = stack.mean(dim=0)
    
avg

In [None]:
torch.argmax(avg).item()