# **Telco Customer Retention: Churn Prediction and Feedback Insights Using Classification and Topic Modelling**

## I. Import Libraries

In [1]:
# import libraries
import hf_xet
import bertopic
import pandas as pd 
import numpy as np
from bertopic import BERTopic
import re
import nltk

from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from sentence_transformers import SentenceTransformer

nltk.download('stopwords')
nltk.download('punkt_tab')
nltk.download('wordnet')
nltk.download('vader_lexicon')

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Asus\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Asus\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Asus\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Asus\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

## II. Data Loading

In [2]:
# load and copy data
data = pd.read_csv('fix-data.csv')
df = data.copy()

# filter only customer feedback
df = df[['customer_feedback']]
df

Unnamed: 0,customer_feedback
0,
1,
2,
3,
4,I feel like this service could be good if they...
...,...
7027,
7028,
7029,"I signed up because of a promo, and the pricin..."
7030,


## III. Feature Engineering

### A. Handle Missing Value

In [3]:
# drop missing value
df_use = df.dropna()
df_use

Unnamed: 0,customer_feedback
4,I feel like this service could be good if they...
5,"At first I was happy, but over time the qualit..."
6,I feel like this service could be good if they...
10,"I don’t have anything major to complain about,..."
13,i recently decided to switch internet provider...
...,...
7013,"Honestly, I don’t even remember signing up for..."
7016,i have been with this internet service provide...
7017,"They were helpful, but the bill confused me. I..."
7026,I feel like this service could be good if they...


Explanation:

### B. Text Preprocessing

In [4]:
# create stopwords list
stop_words_en = set(stopwords.words("english"))
stop_words_en.add('day')
stop_words_en.add('using')
stop_words_en.add('feel')
stop_words_en.add('Im')
stop_words_en.add('like')
stop_words_en.add('also')
stop_words_en.add('service')
stop_words_en.add('company')
stop_words_en.add('im')
stop_words_en.add('Feedback')
stop_words_en.add('feedback')
stop_words_en.add('fully')
stop_words_en.discard('don\'t')



print(f"Total stop words: {len(stop_words_en)}")
print(stop_words_en)
print('')

Total stop words: 209
{'can', "doesn't", 'up', 'im', "i'll", 'using', 'why', 'too', "you'd", 'there', 'won', 'y', 'after', "won't", 'are', 'fully', 'until', 'me', 'such', "we'd", "hadn't", 'hers', "wouldn't", 'any', 'her', 'between', 't', 'to', "she's", 'o', 'only', 'theirs', "they'll", "it'd", 'here', 'aren', 'both', 'didn', 'should', 'wasn', 'couldn', 'same', "aren't", 'these', 'day', 'over', 'him', 'against', 'it', 'them', 'feel', 'their', 'haven', 'into', 'own', 'service', 'again', 'out', 'been', 'Feedback', 'below', "didn't", "we're", "shouldn't", 'he', 'yours', 'wouldn', 'than', "shan't", 'through', 'while', 'does', 'was', 'more', 'my', 'shan', 'for', 'am', 'll', 'off', "she'd", 'above', 'where', 'Im', "hasn't", 'himself', 'they', "weren't", 'yourself', 'his', 'itself', 'feedback', 'company', 'by', 'have', "they're", 'weren', 'on', 'each', 'nor', 'ours', 'a', 'how', 'don', "that'll", "he'd", 'do', 'we', 'whom', 're', 'from', "you've", 'as', "they'd", 'so', "wasn't", "i've", 've',

In [5]:
# function for preprocess text
def text_preprocessing(text):
    # lowercase letter
    text = text.lower()

    # remove mention
    text = re.sub("@[A-Za-z0-9_]+", " ", text)

    # remove hashtag
    text = re.sub("#[A-Za-z0-9_]+", " ", text)

    # remove newline
    text = re.sub(r"\\n", " ",text)

    # remove whitespace
    text = text.strip()

    # remove url
    text = re.sub(r"http\S+", " ", text)
    text = re.sub(r"www.\S+", " ", text)

    # remove non-alphabetic
    text = re.sub("[^A-Za-z\s']", " ", text)

    # tokenisation
    tokens = word_tokenize(text)

    # remove stopwords
    tokens = [word for word in tokens if word not in stop_words_en]

    # join tokens
    text = ' '.join(tokens)

    return text

In [6]:
# perbandingan tulisan setelah di preproses
df_use['customer_feedback_processed'] = df_use['customer_feedback'].apply(lambda x: text_preprocessing(x))
df_use.head(20)

Unnamed: 0,customer_feedback,customer_feedback_processed
4,I feel like this service could be good if they...,could good improved communication product alri...
5,"At first I was happy, but over time the qualit...",first happy time quality became inconsistent b...
6,I feel like this service could be good if they...,could good improved communication product alri...
10,"I don’t have anything major to complain about,...",anything major complain getting full value sup...
13,i recently decided to switch internet provider...,recently decided switch internet providers pre...
14,"Honestly, I don’t even remember signing up for...",honestly even remember signing extras seem app...
15,I’ve been using the service for a while and it...,generally okay speed drops randomly evening tr...
21,"Everything looked good at first — fast setup, ...",everything looked good first fast setup decent...
26,I feel like this service could be good if they...,could good improved communication product alri...
29,"It’s usable, but far from great. Some days it ...",usable far great days works perfectly days cus...


In [7]:
df_use_list = df_use['customer_feedback_processed'].tolist()
df_use_list

['could good improved communication product alright never get emails updates overly technical would love customer friendly approach',
 'first happy time quality became inconsistent billing transparent charged things understand might stay next month goes better honestly already looking alternatives',
 'could good improved communication product alright never get emails updates overly technical would love customer friendly approach',
 'anything major complain getting full value support proactive new features rolled minimal documentation guess expected bit polish',
 'recently decided switch internet providers previous provider almost four years fiber optic internet reliable fast monthly charges bit steep especially since month month contract automatic bank transfer payment method convenient ultimately decided shop around cost effective option overall positive experience pricing ultimately led churn',
 'generally okay speed drops randomly evening tried contacting support got redirected mult

## IV Model Training

In [8]:
# train the model

embedding_model = SentenceTransformer("all-MiniLM-L6-v2", trust_remote_code=True)
model = BERTopic(embedding_model=embedding_model, nr_topics=10).fit(df_use_list) 

Explanation:

BERTopic has several steps in its process. First, for the embedding process, for this model training, we will use the all-MiniLM-L6-v2 pre trained model because is one of the lightest pre-trained embedding models for Natural Language Processing. How this embedding model works by applying cosine similarity to get semantic relationships between tokens by pairing between two sentence to get true pairs and false pairs.  This all-MiniLM-L6-v2 embedding model is based on MiniLM-L6-H384-uncased. The second process is clustering to create a cluster for each topic. The last process is the tokenizer and weighted scheme to understand deeper context between tokens.

## V. Model Evaluation

### A. Topic Information

In [9]:
# topic information
model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,15,-1_overall_churn_check_would,"[overall, churn, check, would, others, conveni...",[electronic check convenient overall complaint...
1,0,478,0_internet_month_monthly_charges,"[internet, month, monthly, charges, satisfied,...",[customer internet provider months satisfied f...
2,1,299,1_drops_support_ages_terrible,"[drops, support, ages, terrible, buggy, clear,...",[works mostly connection drops occasionally su...
3,2,162,2_disable_remember_extras_cool,"[disable, remember, extras, cool, appear, port...",[honestly even remember signing extras seem ap...
4,3,160,3_communication_updates_product_overly,"[communication, updates, product, overly, love...",[could good improved communication product alr...
5,4,146,4_surprised_shows_renewed_promo,"[surprised, shows, renewed, promo, fine, auto,...",[signed promo pricing seemed fair contract aut...
6,5,138,5_days_usable_right_polite,"[days, usable, right, polite, perfectly, hate,...",[usable far great days works perfectly days cu...
7,6,137,6_drop_video_everything_looked,"[drop, video, everything, looked, calls, buffe...",[everything looked good first fast setup decen...
8,7,136,7_complain_documentation_rolled_proactive,"[complain, documentation, rolled, proactive, p...",[anything major complain getting full value su...
9,8,133,8_yet_reconsider_happens_confused,"[yet, reconsider, happens, confused, cancellin...",[helpful bill confused cancelling yet reconsid...


Explanation:

- Topic -1: This topic is filled with outlier that doesn't fit on the data and noisy keywords. Mostly satisfied with the product

- Topic 0: This topic is about customers satisfied with internet experience.

- Topic 1: This topic is about customers complaining about internet connectivity, customer support, and buggy payment app.

- Topic 2: This topic is about customers complaining about added extras with extra charges without warning.

- Topic 3: This topic is about customers complaining with overly technical customer support email.

- Topic 4: This topic is about customers complaining about renewed contract with different terms.

- Topic 5: This topic is about customers complaining about slow respond from customer support.

- Topic 6: his topic is about customers complaining about buffering video streaming.

- Topic 7: This topic is about customers satisfied with customer support, but with minimal user-friendly documentation.

- Topic 8: This topic is about customers complaining about confusing bills and reconsider to cancelling their subscriptions.

## VI. Model Saving

In [10]:
# saving model
model.save('bertopic_model')



In [11]:
# # Load the model
# from bertopic import BERTopic
# model = BERTopic.load("bertopic_model")