In [22]:
import pandas as pd
import numpy as np

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import Pipeline
from sklearn import set_config

import nltk
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
import re #regular expression
import spacy
from matplotlib import pyplot as plt


from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers import pipeline as tf_pipeline

import joblib

from custom_package.text_processing import normalize_text, tokenizer_func, remove_emojis
from custom_package.modeling import GensimLdaTransformer, get_topic_assignment
from custom_package.modeling import topic_mapping_sk_lda, topic_mapping_gensim_lda
from custom_package.database import get_raw_tweets, store_processed_tweets,get_training_raw_tweets






In [23]:
set_config(display='diagram')

Prepare dataset

In [24]:
query_limit = 484000


In [25]:
def get_filtered_tweets(query_limit = 100):
    raw_tweets = get_training_raw_tweets(query_limit)
    data = {'id' : [tweet.id for tweet in raw_tweets],
        'text' : [remove_emojis(tweet.text) for tweet in raw_tweets],
        'company_id' : [tweet.company_id for tweet in raw_tweets],
        'date' : [tweet.date for tweet in raw_tweets]
        }
    filtered_df = pd.DataFrame(data)
    return filtered_df

In [26]:
# get raw tweets for training
filtered_df = get_filtered_tweets(query_limit)

In [27]:
filtered_df.head()

Unnamed: 0,id,text,company_id,date
0,72d17ff2448ce66419a48b1fe774ef4dc72bbe2fd07477...,"@gtbank please why would you people deduct 5,0...",1,2021-09-16 19:48:00-04:00
1,ff90053f9d09c402d9439b9017c507fb71484797fc45d9...,@gtbank Hello. I need to speak with any of yo...,1,2021-09-23 18:40:00-04:00
2,3e68ebe123146b56485db4f986ea05ffe9e98477e1ee67...,I still don't understand why @gtbank and @Firs...,1,2021-09-23 18:00:00-04:00
3,f71466dcab3eee360d8fb8d3453dd5193bd618a6cc1ef6...,Who else has done a redesign of the @gtbank ap...,1,2021-09-30 19:56:00-04:00
4,65bcacf442a828d8151ca52a18115f9f57fdf038bbc93a...,"@gtbank @gtbank_help hello, please what is the...",1,2021-09-30 19:33:00-04:00


In [28]:
filtered_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 181634 entries, 0 to 181633
Data columns (total 4 columns):
 #   Column      Non-Null Count   Dtype 
---  ------      --------------   ----- 
 0   id          181634 non-null  object
 1   text        181634 non-null  object
 2   company_id  181634 non-null  int64 
 3   date        181634 non-null  object
dtypes: int64(1), object(3)
memory usage: 5.5+ MB


In [31]:
filtered_df['company_id'].value_counts()

company_id
3    57535
5    48055
4    43006
2    21723
1    11315
Name: count, dtype: int64

In [29]:
# pre-trained tweet sentiment analysis model

# Load the model and tokenizer
model_name = "cardiffnlp/twitter-roberta-base-sentiment"
sentiment_model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [30]:
# Create a sentiment analysis pipeline
sentiment_pipeline = tf_pipeline("sentiment-analysis", model=sentiment_model, tokenizer=tokenizer)

# Example tweet
tweet = "I love using BERT models for NLP tasks!"

# Run sentiment analysis
result = sentiment_pipeline(tweet)
print(result)

[{'label': 'LABEL_2', 'score': 0.9838293194770813}]


In [31]:
# Example tweet
tweet = ["I love using BERT models for NLP tasks!",
         "I'm not a fan of rainy days.",
         "Neutral statements are hard to classify."]

# Run sentiment analysis
result = sentiment_pipeline(tweet)
print(result)

[{'label': 'LABEL_2', 'score': 0.9838293194770813}, {'label': 'LABEL_0', 'score': 0.9232668876647949}, {'label': 'LABEL_1', 'score': 0.5792319774627686}]


In [32]:
# Create a sentiment analysis pipeline
pos_sentiment_pipeline = tf_pipeline("text-classification", model=sentiment_model,
                                     tokenizer=tokenizer,return_all_scores=True)



In [65]:
joblib.dump(pos_sentiment_pipeline,'sentiment_analyis_pipeline.joblib')

['sentiment_analyis_pipeline.joblib']

In [33]:
# Sentiment label mapping based on model output
label_mapping = {
    'LABEL_0': 'Negative',
    'LABEL_1': 'Neutral',
    'LABEL_2': 'Positive'
}

# Function to get and map sentiment
def get_mapped_sentiment(text):
    result = text
    #result = sentiment_pipeline(text)[0]
    # Map the model's label to a more descriptive term
    sentiment_label = label_mapping.get(result['label'], "Unknown")
    return sentiment_label

In [61]:
# Function to get and map sentiment
def get_pos_sentiment_proba(text):
    result = text
    # Map the model's label to a more descriptive term
    pos_sentiment_proba = ((result[-1].get('score', np.nan) - result[-3].get('score',np.nan))+1)/2
    return pos_sentiment_proba

In [35]:
for item in result:
    print(get_mapped_sentiment(item))

Positive
Negative
Neutral


In [36]:
res = pos_sentiment_pipeline(tweet)

In [67]:
for item in res:
    #print(item)
    print(get_pos_sentiment_proba(item))

0.01617719163186848
0.6022988148033619
0.05246179737150669
0.9222867011558264
0.8429781056474894
0.8084669960662723
0.7804167699068785
0.1778898611664772
0.19008528254926205
0.5373626351356506
0.11399239301681519
0.17357515264302492
0.42282371688634157
0.7521887747570872
0.43846861459314823
0.06602956890128553
0.0783891745377332
0.25492206402122974
0.4713168516755104
0.03141938429325819
0.17915874533355236
0.019122233148664236
0.5373626351356506
0.1743094464763999
0.15861306339502335
0.20599585957825184
0.05943847098387778
0.9741747493389994
0.13299764413386583
0.46483973413705826
0.8189110129605979
0.9932075976976193
0.07920616096816957
0.2729745311662555
0.015406539780087769
0.7536345180124044
0.14886813890188932
0.40066814608871937
0.620157313067466
0.9318287820206024
0.27664264664053917
0.4250229671597481
0.04134296695701778
0.5377062503248453
0.03612997196614742
0.5002337917685509
0.631596639752388
0.16191955702379346
0.7105188621208072
0.11330586578696966
0.09513372927904129
0.12

In [51]:
res = pos_sentiment_pipeline(filtered_df['text'].iloc[0:100].to_list())

In [46]:
res_a = sentiment_pipeline(filtered_df['text'].iloc[0:100].to_list())

In [47]:
sentiment_a = [get_mapped_sentiment(text) for text in res_a]

In [62]:
sentiment_data = [get_pos_sentiment_proba(text) for text in res]

In [40]:
selected_df = filtered_df.iloc[0:100].copy()

In [63]:
selected_df['sentiment']=sentiment_data

In [48]:
selected_df['sentiment_map']= sentiment_a

In [56]:
res[10-1]

[{'label': 'LABEL_0', 'score': 0.15328900516033173},
 {'label': 'LABEL_1', 'score': 0.6186966896057129},
 {'label': 'LABEL_2', 'score': 0.228014275431633}]

In [64]:
selected_df[['text','sentiment','sentiment_map']].to_dict(orient='records')

[{'text': '@gtbank please why would you people deduct 5,000 from my account some minutes ago with after collect sms charges alert for just/august and still took my money. You people are just crooks. Refund me my money please  @Twitter this bank is just worse than I can imagine',
  'sentiment': 0.01617719163186848,
  'sentiment_map': 'Negative'},
 {'text': '@gtbank Hello.  I need to speak with any of your staffs, in regards of my account not been able to be used here in united kindom. Please this important to me.  I will appreciate to get feed back from you.  Regards Adeyemo',
  'sentiment': 0.6022988148033619,
  'sentiment_map': 'Neutral'},
 {'text': "I still don't understand why @gtbank and @FirstBankngr won't allow your card for small international transactions in the name of DCC...",
  'sentiment': 0.05246179737150669,
  'sentiment_map': 'Negative'},
 {'text': 'Who else has done a redesign of the @gtbank app, please share your work in the comments. Would love to see',
  'sentiment':

In [244]:
# Create a sentiment analysis pipeline
pos_sentiment_pipeline = hf_pipeline("text-classification", model=model,
                                     tokenizer=tokenizer,return_all_scores=True)

In [245]:
result = pos_sentiment_pipeline(tweet)
print(result)

[[{'label': 'LABEL_0', 'score': 0.001454407349228859}, {'label': 'LABEL_1', 'score': 0.01471620425581932}, {'label': 'LABEL_2', 'score': 0.9838293194770813}], [{'label': 'LABEL_0', 'score': 0.9232668876647949}, {'label': 'LABEL_1', 'score': 0.07008453458547592}, {'label': 'LABEL_2', 'score': 0.006648621056228876}], [{'label': 'LABEL_0', 'score': 0.3803738057613373}, {'label': 'LABEL_1', 'score': 0.5792319774627686}, {'label': 'LABEL_2', 'score': 0.04039423540234566}]]


In [251]:
result[0][-1]

{'label': 'LABEL_2', 'score': 0.9838293194770813}

In [262]:
# Function to get and map sentiment
def get_pos_sentiment_proba(text):
    result = pos_sentiment_pipeline(text)
    #result = result[0][-1] + result[0][-2]
    # Map the model's label to a more descriptive term
    pos_sentiment_proba = result[0][-1].get('score', np.nan) + result[0][-2].get('score',np.nan)
    return pos_sentiment_proba

In [263]:
for item in tweet:
    print(get_pos_sentiment_proba(item))

0.9985455237329006
0.0767331556417048
0.6196262128651142


In [243]:
for item in tweet:
    result = get_mapped_sentiment(item)
    print(result)

Positive
Negative
Neutral


In [229]:
type(data_normalized)

numpy.ndarray

In [232]:
# Sample DataFrame with an index and tweet texts
data = {
    'Tweet': data_normalized
}
df = pd.DataFrame(data)



# Apply sentiment analysis to each tweet and map the labels
df['Sentiment'] = df['Tweet'].apply(get_mapped_sentiment)

# Display the DataFrame
df.head

<bound method NDFrame.head of                                                    Tweet Sentiment
0       dear bring information account open phone number   Neutral
1      ah Nov Dec receive account charge sm fraud na ...  Negative
2      milli trans year send recharge card talk win s...   Neutral
3      steal think account complain time send dm ask ...  Negative
4      explain meaning debit account SMS charge alert...   Neutral
...                                                  ...       ...
11251              ration money atm stop pay week branch   Neutral
11252  month unable access account upgrade online ban...  Negative
11253  pls refund money card withdrawal decline debit...   Neutral
11254  electronic transfer levy charge time send mone...   Neutral
11255                     new daily charge naira evening   Neutral

[11256 rows x 2 columns]>

In [233]:
df.head()

Unnamed: 0,Tweet,Sentiment
0,dear bring information account open phone number,Neutral
1,ah Nov Dec receive account charge sm fraud na ...,Negative
2,milli trans year send recharge card talk win s...,Neutral
3,steal think account complain time send dm ask ...,Negative
4,explain meaning debit account SMS charge alert...,Neutral


In [234]:
# Apply sentiment analysis to each tweet and map the labels
filtered_df['Sentiment'] = filtered_df['text'].apply(get_mapped_sentiment)

In [237]:
filtered_df[['text','Sentiment']].head()

Unnamed: 0,text,Sentiment
21,Dear @gtbank please bring me my information o...,Neutral
22,Ah. From Nov 25th to Dec 20. I did it receive...,Negative
23,With all my milli trans last year @gtbank @gtb...,Neutral
25,This is stealing!!!! How much do you think I h...,Negative
26,@gtbank @gtbank_help @gtcrea8 @gtbank_careers ...,Neutral


In [238]:
df.to_csv('sentiment_gtb_1.csv')

In [239]:
filtered_df[['text','Sentiment']].to_csv('sentiment_gtb_2.csv')

In [267]:
df_with_index = pd.DataFrame(data_normalized,columns=['text'],index=filtered_df.index)

In [270]:
filtered_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 11256 entries, 21 to 39052
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype              
---  ------       --------------  -----              
 0   id           11256 non-null  object             
 1   text         11256 non-null  object             
 2   date         11256 non-null  datetime64[ns, UTC]
 3   datetime     11256 non-null  datetime64[ns, UTC]
 4   weekly_bins  11256 non-null  period[W-SUN]      
 5   Sentiment    11256 non-null  object             
dtypes: datetime64[ns, UTC](2), object(3), period[W-SUN](1)
memory usage: 615.6+ KB


In [364]:
topic_mapping = {
    '0': 'Fraud',
    '1': 'Mobile App',
    '2': 'General Enquiry',
    '3': 'Customer Service Feedback',
    '4': 'Physical Branch',
    '5': 'General Enquiry',
    '6': 'Money Transfer Issues',
    '7': 'Dispense Error Issues',
    '8': 'Credit Products',
    '9': 'General Enquiry'
}

topic_mapping2 = {
    '0': 'Fraud',
    '1': 'Miscellaneous',
    '2': 'Transaction issues',
    '3': 'Card Issues',
    '4': 'Mobile App',
    '5': 'Miscellaneous',
    '6': 'Physical Branch',
    '7': 'General Enquiry',
    '8': 'Miscellaneous',
    '9': 'Dispense Error Issues'
}


In [272]:
lda_model.transform(filtered_df['text'].iloc[0:10])

AttributeError: 'LdaModel' object has no attribute 'transform'

In [603]:
def tokenizer_func(X):
    return [word_tokenize(doc.lower()) for doc in X]

# Create a Transformer from a tokenizer function so that we can use it in a Pipeline
tokenizer = FunctionTransformer(tokenizer_func)

In [312]:
class GensimLdaTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, gensim_model, gensim_dictionary):
        self.gensim_model = gensim_model
        self.gensim_dictionary = gensim_dictionary

    def fit(self, X, y=None):
        # Since the model is already trained, we don't need to do anything here
        return self

    def transform(self, X):
        # Transform the data into the bag-of-words format
        corpus = [self.gensim_dictionary.doc2bow(doc) for doc in X]
        # Use the Gensim model to transform the data
        transformed_corpus = [self.gensim_model.get_document_topics(bow,minimum_probability=0) for bow in corpus]
        # Return the transformed data in a format suitable for scikit-learn
        # Here, we're returning the topic distribution for each document
        return [[prob for _, prob in doc_topics] for doc_topics in transformed_corpus]


In [313]:
model_transformer = GensimLdaTransformer(lda_model, gensim_dictionary)

In [604]:
gensim_pipeline = Pipeline([('normalizer', normalizer),
                            ('tokenizer', tokenizer),
                            ('model', model_transformer)])


In [365]:
def get_topic_assignment(array):
  temp_result = (max(range(len(array)), key=array.__getitem__), max(array))
  if temp_result[1] < 0.4:
        topic = 'Unclassified'
  else:
       topic = topic_mapping2.get(str(temp_result[0]),'Unclassified')
  return topic
   

In [315]:
result_topics = gensim_pipeline.fit_transform(filtered_df['text'].iloc[0:10])

In [318]:
result_topics[0:2]

[[0.026340678,
  0.4705905,
  0.015595368,
  0.013057382,
  0.011232986,
  0.00985642,
  0.43158793,
  0.007916601,
  0.0072073718,
  0.006614771],
 [0.48484868,
  0.009718684,
  0.1318476,
  0.0065344023,
  0.21795009,
  0.0049309866,
  0.13329412,
  0.0039605103,
  0.0036056943,
  0.003309228]]

In [317]:
for res in result_topics:
    print(get_topic_assignment(res))

Money Transfer Issues
Fraud
General Enquiry
Fraud
Fraud
Unclassified
Fraud
General Inquiry
Physical Branch
Fraud


In [319]:
predicted_topics = gensim_pipeline.fit_transform(filtered_df['text'])

In [336]:
data_topics = {'topic_distribution':predicted_topics}

In [337]:
df_predicted_topics = pd.DataFrame(data_topics,index = filtered_df.index)

In [338]:
df_predicted_topics.head()

Unnamed: 0,topic_distribution
21,"[0.02633998, 0.47062853, 0.015595368, 0.013057..."
22,"[0.48465025, 0.009718691, 0.13182493, 0.006534..."
23,"[0.01933564, 0.014230931, 0.0113611715, 0.0095..."
25,"[0.68735355, 0.008202909, 0.0065843435, 0.2761..."
26,"[0.93861073, 0.012005062, 0.0095918495, 0.0080..."


In [339]:
# Merge df1 and df2 on their indices
df_merged = pd.merge(filtered_df, df_predicted_topics, left_index=True, right_index=True)

In [340]:
df_merged.drop(['datetime','weekly_bins','id'], axis=1,inplace=True)
df_merged.info()

<class 'pandas.core.frame.DataFrame'>
Index: 11256 entries, 21 to 39052
Data columns (total 4 columns):
 #   Column              Non-Null Count  Dtype              
---  ------              --------------  -----              
 0   text                11256 non-null  object             
 1   date                11256 non-null  datetime64[ns, UTC]
 2   Sentiment           11256 non-null  object             
 3   topic_distribution  11256 non-null  object             
dtypes: datetime64[ns, UTC](1), object(3)
memory usage: 697.7+ KB


In [341]:
df_merged.head()

Unnamed: 0,text,date,Sentiment,topic_distribution
21,Dear @gtbank please bring me my information o...,2021-01-07 23:31:00+00:00,Neutral,"[0.02633998, 0.47062853, 0.015595368, 0.013057..."
22,Ah. From Nov 25th to Dec 20. I did it receive...,2021-01-07 23:30:00+00:00,Negative,"[0.48465025, 0.009718691, 0.13182493, 0.006534..."
23,With all my milli trans last year @gtbank @gtb...,2021-01-07 23:29:00+00:00,Neutral,"[0.01933564, 0.014230931, 0.0113611715, 0.0095..."
25,This is stealing!!!! How much do you think I h...,2021-01-07 23:12:00+00:00,Negative,"[0.68735355, 0.008202909, 0.0065843435, 0.2761..."
26,@gtbank @gtbank_help @gtcrea8 @gtbank_careers ...,2021-01-07 23:10:00+00:00,Neutral,"[0.93861073, 0.012005062, 0.0095918495, 0.0080..."


In [342]:
df_merged['topic'] = df_merged['topic_distribution'].apply(get_topic_assignment)

In [343]:
df_merged.head()

Unnamed: 0,text,date,Sentiment,topic_distribution,topic
21,Dear @gtbank please bring me my information o...,2021-01-07 23:31:00+00:00,Neutral,"[0.02633998, 0.47062853, 0.015595368, 0.013057...",Money Transfer Issues
22,Ah. From Nov 25th to Dec 20. I did it receive...,2021-01-07 23:30:00+00:00,Negative,"[0.48465025, 0.009718691, 0.13182493, 0.006534...",Fraud
23,With all my milli trans last year @gtbank @gtb...,2021-01-07 23:29:00+00:00,Neutral,"[0.01933564, 0.014230931, 0.0113611715, 0.0095...",General Enquiry
25,This is stealing!!!! How much do you think I h...,2021-01-07 23:12:00+00:00,Negative,"[0.68735355, 0.008202909, 0.0065843435, 0.2761...",Fraud
26,@gtbank @gtbank_help @gtcrea8 @gtbank_careers ...,2021-01-07 23:10:00+00:00,Neutral,"[0.93861073, 0.012005062, 0.0095918495, 0.0080...",Fraud


In [344]:
df_merged.reset_index().to_csv('gensim_topic.csv',index=False)

In [359]:
check_text = filtered_df['text'].iloc[0:10].to_list()
for text in check_text:
    print(text)
    temp_result = gensim_pipeline.fit_transform([text,])[0]
    print(temp_result)
    print(get_topic_assignment(temp_result),"\n")

Dear @gtbank  please bring me my information of my account which I was opened with you 0486662*** with this phone number 08153921236
[0.026340263, 0.47059926, 0.015595368, 0.013057382, 0.011232986, 0.00985642, 0.43157956, 0.007916601, 0.0072073718, 0.006614771]
Mobile App 

Ah.  From Nov 25th to Dec 20. I did it receive up to 20k in that account and you are charging me 400+ for sms. This is fraud na. @gtbank do you have to steal and kill your customers?  Y’all have 24hours to revoke and give me back my money. Else I’ll dedicate a tweet daily to y’al
[0.48471376, 0.009718689, 0.13183302, 0.0065344027, 0.21795303, 0.004930987, 0.1334407, 0.0039605103, 0.0036056945, 0.0033092282]
Fraud 

With all my milli trans last year @gtbank @gtbank_help never sent me a recharge card talk more this. They should win the stingy bank of the year. 
[0.019335067, 0.014230783, 0.011361163, 0.009501686, 0.5282072, 0.0071715307, 0.3943758, 0.00575999, 0.005243965, 0.004812798]
Physical Branch 

This is steali

In [362]:
#check_text
print(pipeline)

Pipeline(steps=[('preprocessor',
                 Pipeline(steps=[('normalizer',
                                  FunctionTransformer(func=<function normalize_text at 0x000001DEBFC67AF0>)),
                                 ('vectorizer',
                                  CountVectorizer(max_df=0.75,
                                                  max_features=5000,
                                                  min_df=2))])),
                ('model',
                 LatentDirichletAllocation(doc_topic_prior=0.01,
                                           learning_method='online', max_iter=5,
                                           random_state=27,
                                           topic_word_prior=0.91))])


In [366]:
check_text = filtered_df['text'].iloc[0:10].to_list()
for text in check_text:
    print(text)
    temp_result = pipeline.transform([text,])[0]
    print(temp_result)
    print(get_topic_assignment(temp_result),"\n")

Dear @gtbank  please bring me my information of my account which I was opened with you 0486662*** with this phone number 08153921236
[0.00140845 0.00140845 0.00140845 0.98732394 0.00140845 0.00140845
 0.00140845 0.00140845 0.00140845 0.00140845]
Card Issues 

Ah.  From Nov 25th to Dec 20. I did it receive up to 20k in that account and you are charging me 400+ for sms. This is fraud na. @gtbank do you have to steal and kill your customers?  Y’all have 24hours to revoke and give me back my money. Else I’ll dedicate a tweet daily to y’al
[0.00066225 0.00066225 0.46946207 0.3100903  0.00066225 0.00066225
 0.00066225 0.00066225 0.21581187 0.00066225]
Transaction issues 

With all my milli trans last year @gtbank @gtbank_help never sent me a recharge card talk more this. They should win the stingy bank of the year. 
[0.0009901  0.0009901  0.27751436 0.16204566 0.0009901  0.0009901
 0.0009901  0.0009901  0.55350929 0.0009901 ]
Miscellaneous 

This is stealing!!!! How much do you think I had i

In [599]:
print(pipeline)

Pipeline(steps=[('preprocessor',
                 Pipeline(steps=[('normalizer',
                                  FunctionTransformer(func=<function normalize_text at 0x000001DEBFC67AF0>)),
                                 ('vectorizer',
                                  CountVectorizer(max_df=0.95,
                                                  max_features=5000, min_df=50,
                                                  ngram_range=(1, 3),
                                                  stop_words=['nothing',
                                                              'forty', 'beyond',
                                                              'yours',
                                                              'however',
                                                              'fifty', 'seems',
                                                              'meanwhile', 'be',
                                                              'well', 'put',
            

In [600]:
print(gensim_pipeline)

Pipeline(steps=[('normalizer',
                 FunctionTransformer(func=<function normalize_text at 0x000001DEBFC67AF0>)),
                ('tokenizer',
                 FunctionTransformer(func=<function <lambda> at 0x000001DEC8DF0EE0>)),
                ('model',
                 GensimLdaTransformer(gensim_dictionary=<gensim.corpora.dictionary.Dictionary object at 0x000001DEC0423DC0>,
                                      gensim_model=<gensim.models.ldamodel.LdaModel object at 0x000001DEE44FA7C0>))])


In [601]:
#joblib.dump(pipeline,'sklearn_LDA_pipeline.joblib')

['sklearn_LDA_pipeline.joblib']

In [605]:
#joblib.dump(gensim_pipeline,'gensim_LDA_pipeline.joblib')

['gensim_LDA_pipeline.joblib']

In [1]:
filtered_df.info()

NameError: name 'filtered_df' is not defined

In [2]:
check_text

NameError: name 'check_text' is not defined