In [1]:
# Library Load Model
import pandas as pd
import numpy as np
import pickle
from tensorflow.keras.models import load_model

# Library Pre-Processing
import nltk
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from gensim.models import Word2Vec

## Load model

In [2]:
with open("tokenizer.pkl", "rb") as tokenizer_file:
    tokenizer = pickle.load(tokenizer_file)

In [3]:
#load model
model_rnn = load_model('model_improve.h5')

## Define preprocess function

In [4]:
# build text cleaning function
def clean_text(x):
    '''
    Clean the text data by applying various operations to input text.
    Parameters:
        text (str): The input text to be cleaned.
    Returns:
        str: The cleaned text.
        '''
    # Convert the input text to lowercase
    text = x
    text = text.lower()

    # Remove square brackets and any content within them
    text = re.sub('\[.*?\]', '', text)

    # Remove punctuation marks from the text
    text = re.sub(r'[^\w\s]', '', text)

    # Remove words that contain numbers
    text = re.sub('\w*\d\w*', '', text)

    # Remove any URLs present in the text
    text = re.sub(r'http\S+', '', text)

    # Remove newline
    text = re.sub('\n', '', text)

    # Return the cleaned text
    return text

In [5]:
def remove_stopwords(text):
    '''
    Function to remove stopwords from text using NLTK library.
    Parameters:
        text (str): The input text to be cleaned.
    Returns:
        str: The cleaned text with stopwords removed.
    '''
    stop_words = set(stopwords.words('english'))
    tokens = nltk.word_tokenize(text)
    filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
    cleaned_text = ' '.join(filtered_tokens)
    return cleaned_text

In [6]:
def lemmatize_text(text):
    '''
    Function to lemmatize text using NLTK library.
    Parameters:
        text (str): The input text to be lemmatized.
    Returns:
        str: The lemmatized text.
    '''
    lemmatizer = WordNetLemmatizer()
    tokens = nltk.word_tokenize(text)
    lemmatized_tokens = [lemmatizer.lemmatize(word) for word in tokens]
    lemmatized_text = ' '.join(lemmatized_tokens)
    return lemmatized_text

In [7]:
def preprocess_text(text):
    '''
    Function to preprocess text by cleaning, removing stopwords, and lemmatizing.

    Parameters:
        text (str): The input text to be preprocessed.

    Returns:
        str: The preprocessed text.
    '''
    text = clean_text(text)
    text = remove_stopwords(text)
    text = lemmatize_text(text)
    return text

## Inference data

In [26]:
# Create New Data 

data_inf = {
    'text' : '''
As U.S. budget fight looms, Republicans flip their fiscal script WASHINGTON (Reuters) - The head of a conservative Republican faction in the U.S. Congress, who voted this month for a huge expansion of the national debt to pay for tax cuts, called himself a “fiscal conservative” on Sunday and urged budget restraint in 2018. In keeping with a sharp pivot under way among Republicans, U.S. Representative Mark Meadows, speaking on CBS’ “Face the Nation,” drew a hard line on federal spending, which lawmakers are bracing to do battle over in January. When they return from the holidays on Wednesday, lawmakers will begin trying to pass a federal budget in a fight likely to be linked to other issues, such as immigration policy, even as the November congressional election campaigns approach in which Republicans will seek to keep control of Congress. President Donald Trump and his Republicans want a big budget increase in military spending, while Democrats also want proportional increases for non-defense “discretionary” spending on programs that support education, scientific research, infrastructure, public health and environmental protection. “The (Trump) administration has already been willing to say: ‘We’re going to increase non-defense discretionary spending ... by about 7 percent,’” Meadows, chairman of the small but influential House Freedom Caucus, said on the program. “Now, Democrats are saying that’s not enough, we need to give the government a pay raise of 10 to 11 percent. For a fiscal conservative, I don’t see where the rationale is. ... Eventually you run out of other people’s money,” he said. Meadows was among Republicans who voted in late December for their party’s debt-financed tax overhaul, which is expected to balloon the federal budget deficit and add about $1.5 trillion over 10 years to the $20 trillion national debt. “It’s interesting to hear Mark talk about fiscal responsibility,” Democratic U.S. Representative Joseph Crowley said on CBS. Crowley said the Republican tax bill would require the United States to borrow $1.5 trillion, to be paid off by future generations, to finance tax cuts for corporations and the rich. “This is one of the least ... fiscally responsible bills we’ve ever seen passed in the history of the House of Representatives. I think we’re going to be paying for this for many, many years to come,” Crowley said. Republicans insist the tax package, the biggest U.S. tax overhaul in more than 30 years, will boost the economy and job growth. House Speaker Paul Ryan, who also supported the tax bill, recently went further than Meadows, making clear in a radio interview that welfare or “entitlement reform,” as the party often calls it, would be a top Republican priority in 2018. In Republican parlance, “entitlement” programs mean food stamps, housing assistance, Medicare and Medicaid health insurance for the elderly, poor and disabled, as well as other programs created by Washington to assist the needy. Democrats seized on Ryan’s early December remarks, saying they showed Republicans would try to pay for their tax overhaul by seeking spending cuts for social programs. But the goals of House Republicans may have to take a back seat to the Senate, where the votes of some Democrats will be needed to approve a budget and prevent a government shutdown. Democrats will use their leverage in the Senate, which Republicans narrowly control, to defend both discretionary non-defense programs and social spending, while tackling the issue of the “Dreamers,” people brought illegally to the country as children. Trump in September put a March 2018 expiration date on the Deferred Action for Childhood Arrivals, or DACA, program, which protects the young immigrants from deportation and provides them with work permits. The president has said in recent Twitter messages he wants funding for his proposed Mexican border wall and other immigration law changes in exchange for agreeing to help the Dreamers. Representative Debbie Dingell told CBS she did not favor linking that issue to other policy objectives, such as wall funding. “We need to do DACA clean,” she said. On Wednesday, Trump aides will meet with congressional leaders to discuss those issues. That will be followed by a weekend of strategy sessions for Trump and Republican leaders on Jan. 6 and 7, the White House said. Trump was also scheduled to meet on Sunday with Florida Republican Governor Rick Scott, who wants more emergency aid. The House has passed an $81 billion aid package after hurricanes in Florida, Texas and Puerto Rico, and wildfires in California. The package far exceeded the $44 billion requested by the Trump administration. The Senate has not yet voted on the aid.
    '''}

data_inf = pd.DataFrame([data_inf])
data_inf

Unnamed: 0,text
0,"\nAs U.S. budget fight looms, Republicans flip..."


## Preprocess inference

In [27]:
data_inf['text'] = data_inf['text'].apply(lambda x: preprocess_text(x))
data_inf

Unnamed: 0,text
0,u budget fight loom republican flip fiscal scr...


In [28]:
data_inf = tokenizer.texts_to_sequences(data_inf)

In [29]:
data_inf = pad_sequences(data_inf, maxlen=700)

## Predict

In [30]:
# Predict using ANN

import numpy as np
y_pred_inf = model_rnn.predict(data_inf)
y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
y_pred_inf



array([[1]])