In [41]:
#import ALL necessary libraries & packages at the very begining is highly suggested
#for avoidable debugging later.
#BUT for newcomers, importing requested libraries and/or packages along the process 
#may make more sense in terms of logical flow.
import pandas as pd
import nltk
nltk.download('punkt') #can be commented out after running it once 
from nltk.corpus import stopwords
nltk.download('stopwords') #can be commented out after running it once 
import string

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/chenhonglin/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/chenhonglin/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [42]:
#load the csv file into pandas dataframe to process it in Python
corpus_master = pd.read_csv("MASTERCARD.csv")
#to have as few lines of codes as possible, the check on the dataset can be skipped
#BUT taking a look at what is inside of the "box" is ALWAYS helpful to detect possible
#bugs and debug
#corpus_master

In [43]:
#looping over a dataframe is way more time-consuming than doing it with a list
#thus, preparing the data in the format of a list from the beginning is highly suggested
#for the sake of saving deployment time

#select the column with text that will be manipulated later & put the text as a list
#there are more than one way to get this done, .loc[], .iloc[] & ["Paragraph"]
text = list(corpus_master.loc[:, "Paragraph"])
text

['“Mastercard Accelerate is a single doorway to the countless ways Mastercard can help FinTechs all over the world grow and scale sustainably,” said Mastercard Chief Product and Innovation Officer Michael Miebach. “FinTechs are contributing to the rapid digital transformation that makes lives more convenient, simpler, and rewarding. We’re the partner of choice for the top FinTech brands worldwide, and with Accelerate we invite the next generation of global entrepreneurs to join us.”',
 '“It’s a significant acquisition for us but on the backs of three years of experience in real-time payments and increasing growth and excitement around real-time payments,” said Michael Miebach, the company’s chief product and innovation officer, in an interview with MarketWatch. He expects the deal will make Mastercard “a more relevant partner for banks” and others in the payments ecosystem.',
 '“Open banking is a growing global trend and a strategically important space for us. With the addition of Fini

In [44]:
#set everything to lower case to avoid the "false positive" from the same words with different spellings
#there is more than one way to get this done
#corpus_master_lower = corpus_master["Paragraph"].str.lower() this method doesn't prepare the data clean enough
#list(corpus_master_lower) only keep the list of paragraphs in lower case

lower_case_text = []
for paragraph in text:  
    lower_case_text.append(paragraph.lower())
    
lower_case_text

['“mastercard accelerate is a single doorway to the countless ways mastercard can help fintechs all over the world grow and scale sustainably,” said mastercard chief product and innovation officer michael miebach. “fintechs are contributing to the rapid digital transformation that makes lives more convenient, simpler, and rewarding. we’re the partner of choice for the top fintech brands worldwide, and with accelerate we invite the next generation of global entrepreneurs to join us.”',
 '“it’s a significant acquisition for us but on the backs of three years of experience in real-time payments and increasing growth and excitement around real-time payments,” said michael miebach, the company’s chief product and innovation officer, in an interview with marketwatch. he expects the deal will make mastercard “a more relevant partner for banks” and others in the payments ecosystem.',
 '“open banking is a growing global trend and a strategically important space for us. with the addition of fini

In [45]:
#tokenizing
tokenized_text = []
for paragraph in lower_case_text:    
    tokenized_text.append(nltk.word_tokenize(paragraph))    
tokenized_text

[['“',
  'mastercard',
  'accelerate',
  'is',
  'a',
  'single',
  'doorway',
  'to',
  'the',
  'countless',
  'ways',
  'mastercard',
  'can',
  'help',
  'fintechs',
  'all',
  'over',
  'the',
  'world',
  'grow',
  'and',
  'scale',
  'sustainably',
  ',',
  '”',
  'said',
  'mastercard',
  'chief',
  'product',
  'and',
  'innovation',
  'officer',
  'michael',
  'miebach',
  '.',
  '“',
  'fintechs',
  'are',
  'contributing',
  'to',
  'the',
  'rapid',
  'digital',
  'transformation',
  'that',
  'makes',
  'lives',
  'more',
  'convenient',
  ',',
  'simpler',
  ',',
  'and',
  'rewarding',
  '.',
  'we',
  '’',
  're',
  'the',
  'partner',
  'of',
  'choice',
  'for',
  'the',
  'top',
  'fintech',
  'brands',
  'worldwide',
  ',',
  'and',
  'with',
  'accelerate',
  'we',
  'invite',
  'the',
  'next',
  'generation',
  'of',
  'global',
  'entrepreneurs',
  'to',
  'join',
  'us',
  '.',
  '”'],
 ['“',
  'it',
  '’',
  's',
  'a',
  'significant',
  'acquisition',
  'fo

In [46]:
stop_words = set(stopwords.words('english'))
#stop_words

In [47]:
# Step 4 removing stop words
clean_text = [] #create an empty list called "clean_text"
for paragraph in tokenized_text:    
    temp_paragraph = [] #create an empty list called "temp_paragraph"    
    for word in paragraph:        
        if word not in stop_words:           
            temp_paragraph.append(word)            
    clean_text.append(temp_paragraph)    
clean_text

[['“',
  'mastercard',
  'accelerate',
  'single',
  'doorway',
  'countless',
  'ways',
  'mastercard',
  'help',
  'fintechs',
  'world',
  'grow',
  'scale',
  'sustainably',
  ',',
  '”',
  'said',
  'mastercard',
  'chief',
  'product',
  'innovation',
  'officer',
  'michael',
  'miebach',
  '.',
  '“',
  'fintechs',
  'contributing',
  'rapid',
  'digital',
  'transformation',
  'makes',
  'lives',
  'convenient',
  ',',
  'simpler',
  ',',
  'rewarding',
  '.',
  '’',
  'partner',
  'choice',
  'top',
  'fintech',
  'brands',
  'worldwide',
  ',',
  'accelerate',
  'invite',
  'next',
  'generation',
  'global',
  'entrepreneurs',
  'join',
  'us',
  '.',
  '”'],
 ['“',
  '’',
  'significant',
  'acquisition',
  'us',
  'backs',
  'three',
  'years',
  'experience',
  'real-time',
  'payments',
  'increasing',
  'growth',
  'excitement',
  'around',
  'real-time',
  'payments',
  ',',
  '”',
  'said',
  'michael',
  'miebach',
  ',',
  'company',
  '’',
  'chief',
  'product',


In [18]:
#in/not in logic
for paragraph in corpus_master_lower:
    for word in nltk.word_tokenize(paragraph):
        if word not in stop_words:
            print(word)

“
mastercard
accelerate
single
doorway
countless
ways
mastercard
help
fintechs
world
grow
scale
sustainably
,
”
said
mastercard
chief
product
innovation
officer
michael
miebach
.
“
fintechs
contributing
rapid
digital
transformation
makes
lives
convenient
,
simpler
,
rewarding
.
’
partner
choice
top
fintech
brands
worldwide
,
accelerate
invite
next
generation
global
entrepreneurs
join
us
.
”
“
’
significant
acquisition
us
backs
three
years
experience
real-time
payments
increasing
growth
excitement
around
real-time
payments
,
”
said
michael
miebach
,
company
’
chief
product
innovation
officer
,
interview
marketwatch
.
expects
deal
make
mastercard
“
relevant
partner
banks
”
others
payments
ecosystem
.
“
open
banking
growing
global
trend
strategically
important
space
us
.
addition
finicity
,
expect
advance
open
banking
strategy
,
enhance
support
accelerate
today
’
digital
economy
across
several
markets
,
”
said
michael
miebach
,
president
mastercard
.
“
finicity
proven
business
,
built
par

partnership
mastercard
next
level
work
make
charitable
giving
frictionless
,
social
secure
,
”
goodworld
founder
ceo
dale
nirvani
pfeifer
said
press
release
.
“
well
aligned
belief
well
good
,
could
ask
better
strategic
partner
.
”
“
’
said
,
bears
repeating
today–working
mastercard
perfect
fit
us
across
board
:
technology
,
culture
,
values
.
appreciate
commitment
us
,
excited
see
achieve
together
going
forward
,
”
goodworld
cofounder
coo
john
gossart
said
statement
.
“
rural
opportunity
map
ensure
data
driven
insights
catalyze
inclusive
growth
achieve
broadly
shared
prosperity
across
rural
america
,
”
said
shamina
singh
,
president
mastercard
center
inclusive
growth
.
“
channeling
investment
towards
rural
entrepreneurship
,
map
helping
build
engine
economic
activity
make
digital
economy
work
everyone
,
everywhere
.
”
“
likely
get
better
outcomes
start
better
inputs
,
”
said
shamina
singh
,
founder
president
,
mastercard
center
inclusive
growth
.
“
’
working
accelerator
america
,
ensu

efficiencies
across
global
supply
chains
,
starting
automotive
industry
.
collaboration
advance
connected
scalable
digital
ecosystem
,
allowing
companies
irrespective
size
,
location
technical
capability
build
increased
trust
security
trading
partner
relationships
.
integrated
opentext
mastercard
offering
also
provide
opentext
business
network
customers
ability
access
spot
financing
mastercard
track™
b2b
global
trade
enablement
platform
.
leverage
opentext
supplier
portal
(
formerly
covisint
supplier
portal
)
,
opentext
identity
portal
opentext
iot
platform
,
integrated
mastercard
's
financial
partners
.
“
digitizing
simplifying
supply
chain
related
finance
processes
ease
global
trading
operations
across
industries
unlock
opportunities
suppliers
every
level
business
enterprise
buyers
efficiently
,
”
said
claire
thompson
,
executive
vice
president
enterprise
partnerships
mastercard
.
“
excited
opentext
partnering
mastercard
integrate
digital
platforms
together
allow
companies
quickly
id

effort
strike
back
,
visa
recently
said
partner
revolut
startup
expands
24
new
markets
,
total
56
around
world
.
revolut
start
issuing
mastercard
us
,
visa
says
get
chunk
business
revolut
time
.
revolut
says
75
%
cards
outside
europe
eventually
visa
branded
.
visa
executives
confessed
slow
chase
fintech
craze
say
catching
.
mastercard
claims
clear
leader
fintech
game
:
card
network
says
“
partner
choice
”
60
digital
banks
financial
startups
,
twice
many
two
years
ago
.
mastercard
also
reported
services
business
,
working
merchants
inventory
levels
,
partnering
fintechs
,
data
analytics
,
fraud
prevention
would
help
weather
downturn
since
businesses
reliant
transaction
volumes
.
“
thrilled
take
partnership
mastercard
next
level
work
make
charitable
giving
frictionless
,
social
secure
,
”
goodworld
founder
ceo
dale
nirvani
pfeifer
said
press
release
.
“
well
aligned
belief
well
good
,
could
ask
better
strategic
partner
.
”
michael
froman
,
vice
chairman
president
strategic
growth
masterc

city
,
joined
company
executive
vice
president
global
cities
.
newly
created
role
,
gamiño
lead
scaling
urban
tech
solutions
pioneered
mastercard
shape
new
partnership
models
address
cities
’
pressing
challenges
.
“
basic
internet
access
better
transit
links
,
successful
city
needs
include
everyone
,
requires
new
engagement
model
public
private
sector
”
,
comments
hany
fam
,
executive
vice
president
enterprise
partnerships
mastercard
.
“
strongly
committed
partner
cities
around
globe
become
inclusive
,
sustainable
connected
places
.
miguel
’
passion
make
tech
work
people
help
take
leadership
next
level
–
focused
improving
quality
life
everyone
living
traveling
cities
.
”
recently
,
mastercard
introduced
city
possible
,
first-of-its
kind
initiative
bringing
together
cities
united
private
sector
solve
system-wide
challenges
.
corporate
partners
cities
around
world
work
together
co-develop
,
scale
commercialize
urban
solutions
–
ensuring
cities
benefit
’
learnings
rather
reinvent
wheel
.


percent
electronic
waste
62
percent
waste
owned
campuses
.
help
merchants
understand
state
digital
fraud
,
solutions
exist
safeguard
,
mastercard
partnered
mercator
advisory
group
release
white
paper
titled
“
authentication
,
intelligence
,
consumer
journey
:
multi-layered
approach
reduce
digital
fraud
.
”
“
payment
technology
company
,
work
bring
new
products
promote
financial
inclusion
safe
,
practical
easy
way
continuous
transactions
.
partnership
facebook
shows
ability
revolutionize
options
sending
receiving
money
brazil
,
keeping
needs
customers
forefront
strategy
supporting
small
local
companies
,
“
said
joao
pedro
paro
neto
,
president
mastercard
brasil
cone
sul
.
institute
supply
management
’
exclusive
industry
partner
,
caps
research
(
caps
)
,
announced
appointment
bryan
fuller
,
former
chief
procurement
officer
mastercard
,
executive
director
.
fuller
replaces
former
cpo
deborah
stanton
,
retired
leading
caps
august
2014
july
2020
.
“
’
excited
partner
mastercard
leading
glo

mastercard
,
arpit
ratan
,
co-founder
signzy
.
’
let
explain
nature
partnership
problem
’
attacking
together
.
judges
chose
partnership
based
four
criteria
:
creativity
,
innovative
thinking
,
customer
value
results
.
zahir
khoja
,
mastercard
:
mastercard
looks
growing
ecosystem
,
think
channel
partners
participate
enable
growth
us
,
given
b2b2c
organization
.
formed
partnership
’
tremendous
success
terms
inbound
outbound
requests
us
enable
smart
merchant
onboarding
partners
.
’
going
launching
large
global
acquirer
us
q2
,
tremendous
accolade
problem
’
able
solve
signzy
,
mastercard
,
partner
.
perspective
,
mastercard
’
big
compay
’
working
.
terms
vision
,
mastercard
built
trusted
payment
system
turned
physical
payments
online
,
real
time
,
digital
.
’
’
trying
solve
onboarding
,
well
—
’
trying
bring
trust
digital
onboarding
merchants
across
networks
.
feel
mastercard
way
built
business
good
partner
us
.
zahir
khoja
,
mastercard
:
course
,
’
looking
things
like
merchant
growth
.
su

's
mastercard
's
operations
core
businesses
,
moffettnathanson
's
ellis
said
.
plus
,
nothing
strategies
set
stone
.
investments
large
firms
mastercard
samsung
’
ensure
adoption
,
sure
’
hurt
.
also
noted
mastercard
samsung
partners
digital
identity
services
.
hypr
offers
mobile
authentication
solution
utilizes
private/public
key
pairs
fido
compliant
.
ceo
also
indicates
business
seen
tremendous
growth
due
requirements
psd2
.
started
“
moments
,
”
first
phase
evolution
.
create
curate
experiences
people
,
customers
,
consumers
,
money
’
buy
.
’
buy
money
,
get
mastercard
.
’
kinds
small
things
giant
things
huge
artist
,
like
justin
timberlake
,
come
home
spend
day
.
cuts
across
entire
spectrum
.
1.8
billion
consumers
around
world
,
relevant
entire
spectrum
.
create
priceless
moments
,
,
acquire
lot
sponsorship
assets
convert
experiences
consumers
.
collaborative
announced
world
economic
forum
’
annual
meeting
davos
,
switzerland
,
initial
commitment
$
50
million
rockefeller
foundation


expertise
,
resources
need
address
world
's
pressing
challenges
.
building
rockefeller
foundation
's
leadership
social
impact
space
mastercard
center
’
innovations
data
philanthropy
,
initiative
identify
key
priorities
investment
opportunities
aimed
accelerating
``
data
good
''
efforts
.
growing
data
science
capabilities
social
civic
organizations
,
collaboration
aims
help
social
change
organizations
identify
trends
new
insights
enable
build
impactful
programs
people
communities
serve
.
conjunction
launch
,
foundation
mastercard
also
announced
$
20
million
investment
datakind
,
global
nonprofit
completed
two
hundred
fifty
data
science
artificial
intelligence
projects
help
network
thirty
thousand
data
science
volunteers
spread
across
five
worldwide
chapters
.
established
2011
,
datakind
use
investment
transition
project-
platform-based
model
,
enabling
support
organizations
around
set
common
challenges
,
including
community
health
inclusive
economic
growth
.
mastercard
center
inclusive


“
experience
–
idea
provide
communities
access
safe
,
affordable
toilets
shelved
gained
needed
financial
backing
.
believe
new
seed
funding
initiative
mastercard
foundation
help
catalyze
give-back
projects
provide
many
scholars
opportunity
convert
ideas
workable
solutions
.
”
mastercard
inc.
announced
biggest
acquisition
history
tuesday
,
card
network
continues
efforts
move
beyond
plastic
.
company
plans
acquire
account-to-account
business
danish
payment-technology
company
nets
€2.85
billion
,
$
3.19
billion
,
deal
expected
close
first
half
2020.
mastercard
,
“
’
significant
acquisition
us
backs
three
years
experience
real-time
payments
increasing
growth
excitement
around
real-time
payments
,
”
said
michael
miebach
,
company
’
chief
product
innovation
officer
,
interview
marketwatch
.
expects
deal
make
mastercard
“
relevant
partner
banks
”
others
payments
ecosystem
.
though
nets
’
technology
prominent
several
european
markets
,
mastercard
hopes
expand
broader
geographies
,
factor
excit

mastercard
develop
new
solutions
help
demographic
grow
financially
,
”
said
atif
siddiqi
,
chief
executive
officer
founder
branch
.
mastercard
wants
organizations
work
together
heighten
privacy
fend
cyberattacks
.
initiative
depends
coming
fundamentals
direct
data
used
.
based
premise
businesses
responsibility
comes
data
management
.
strength
global
consumer
spending
—
along
double-digit
cross-border
gains
traction
b2b
payments
—
marked
third-quarter
mastercard
results
released
tuesday
(
oct.
30
)
,
management
eyes
digital
checkout
high-tech
payments
initiatives
u.
s.
beyond
.
banga
also
said
“
b2b
front
,
really
excited
launch
mastercard
track
,
”
billed
global
trade
platform
,
one
developed
collaboration
microsoft
,
embracing
150
million
firms
across
75
countries
.
“
track
basically
solves
key
challenges
procure-to-pay
process
,
including
managing
supply
chain
risk
creating
transparency
b2b
payments
process
,
”
told
analysts
.
“
’
going
deploy
platform
phased
rollout
.
first
phase
fo

another
local
crime
may
affect
retail
.
center
recently
turned
attention
pullman
.
describes
two
different
approaches
used
tackle
sales
training
:
completion
learning
consumption
learning
.
framework
important
understanding
mastercard
’
training
initiatives
.
indian
digital
payment
market
received
healthy
boost
government
’
november
2016
demonetization
initiative
.
indian
government
’
effort
promote
digital
payments
,
mastercard
expected
register
growth
number
transactions
transaction
value
.
dispute
best
resolved
exist
.
basically
,
way
prevent
dispute
filed
issuing
banks
,
disputes
problem
merchants
.
mastercard
plans
prevent
invalid
disputes
streamline
dispute
process
.
merchants
need
know
upcoming
mastercard
dispute
resolution
initiative
:
second
phase
focus
redefining
current
second
chargeback
phase
dispute
process
.
april
12
,
2019
,
mastercard
dispute
resolution
initiative
send
disputes
following
reason
codes
second
chargeback
:
please
note
mastercard
plan
make
new
workflows
man

,
already
adopted
sustainable
practices
banking
card
business
.
initiative
comes
time
see
growing
interest
banks
greener
cards
.
”
green
payments
partnership
one
ways
mastercard
looking
encourage
sustainable
practices
within
outside
company
.
company
became
first
payments
player
recognized
science
based
targets
initiative
(
sbti
)
committing
20
percent
reduction
greenhouse
gas
emissions
2025
support
goals
paris
agreement
one
150
publicly
traded
companies
listed
jones
sustainability
north
america
index
.
addition
,
mastercard
achieved
100
percent
renewable
energy
across
global
operations
.
comes
company
diverted
100
percent
electronic
waste
62
percent
waste
owned
campuses
.
purchase
,
n
.
y.
--
(
)
--
businesses
governments
across
globe
looking
resources
help
better
manage
health
,
safety
economic
risks
presented
recent
pandemic
.
assist
efforts
,
mastercard
launched
recovery
insights
,
set
tools
,
innovation
research
provide
certainty
today
support
data-driven
decision-making
digital
t

firstly
,
highlighted
rising
consumer
spending
due
low
unemployment
level
steady
wage
growth
.
according
data
compiled
u.
s.
bureau
economic
analysis
,
consumer
spending
united
states
totaled
$
12.84
trillion
,
~1.0
%
sequentially
~2.6
%
yoy
.
increased
spending
use
digital
payment
methods
help
payment
processing
companies
like
mastercard
visa
inc
.
introducing
digital
technology
,
services
training
,
mastercard
accion
help
high
potential
microbusinesses
increase
revenues
achieve
greater
financial
security
individuals
families
depend
,
catalyzing
greater
economic
opportunities
communities
serve
.
``
continue
see
upside
street
estimates
next
12
months
driven
core
consumer
volume
exposure
europe
growth
markets
,
''
said
research
note
last
september
.
``
addition
,
believe
b2b
market
represents
next
growth
horizon
payments
industry
think
mastercard
best
positioned
given
multifaceted
product
approach
.
''
mastercard
inc.
wednesday
reported
double-digit
growth
fourth-quarter
purchase
volume

opportunity
b2b
space
,
pegging
addressable
market
$
120
trillion
.
early
efforts
gain
traction
b2b
often
helping
accelerate
shift
digital
payments
,
since
many
b2b
transactions
’
involve
cards
.
mastercard
offers
non-carded
options
area
sachin
said
company
continuing
win
deals
b2b
products
.
mastercard
also
reported
$
10.7
billion
balance
sheet
,
allowing
investment
``
new
opportunities
,
''
banga
said
,
could
mean
acquisitions
.
card
company
temporarily
halted
share
buybacks
,
banga
said
would
covid-19-related
layoffs
mastercard
.
“
trying
time
”
card
brand
seen
early
signs
stabilization
spending
,
banga
said
.
generous
supporters
made
rural
opportunity
map
possible
:
mastercard
center
inclusive
growth
,
reid
hoffman
,
rural
lisc
,
schmidt
futures
,
walmart
,
inc
.
“
rural
opportunity
map
ensure
data
driven
insights
catalyze
inclusive
growth
achieve
broadly
shared
prosperity
across
rural
america
,
”
said
shamina
singh
,
president
mastercard
center
inclusive
growth
.
“
channeling
inve

thrilled
deepen
longstanding
relationship
microsoft
advancing
research
,
development
scaling
new
technologies
business
models
,
”
said
ken
moore
,
executive
vice
president
head
mastercard
labs
.
“
strategic
collaboration
strengthen
extend
cloud
services
capabilities
clients
fintech
partners
,
sparking
innovation
creativity
ecosystem
.
enable
us
explore
opportunities
focused
new
client
segments
,
technologies
trends
continue
drive
financial
inclusion
build
future
commerce
.
”
“
mastercard
,
see
connected
world
opportunity
prosperity
possible
everyone
,
everywhere
,
”
says
jorn
lambert
,
executive
vice
president
digital
solutions
mastercard
.
mastercard
's
ceo
says
firm
pulled
facebook-led
libra
project
developing
concerns
business
model
compliance
.
lack
clear
business
model
libra
raised
another
red
flag
mastercard
.
banga
said
obvious
means
libra
association
become
profitable
make
money
users
.
``
’
understand
money
gets
made
,
gets
made
ways
’
like
,
''
said
.
introducing
digital
tech

also
emerging
player
indian
markets
gained
significant
market
share
last
two
years
.
players
also
charge
less
processing
fees
compared
palyers
like
mastercard
pose
significant
threat
mastercard
’
market
share
.
elcin
yanik
,
executive
vice
president
,
market
development
,
middle
east
africa
mastercard
,
said
:
“
mastercard
rapidly
expanding
presence
partnerships
africa
,
bringing
new
technologies
continent
help
enhance
consumer
experience
enable
greater
access
inclusion
.
partnership
jumia
underpins
mastercard
’
commitment
transforming
africa
’
digital
payments
landscape
.
recent
years
,
invested
heavily
technology
,
people
local
markets
,
seen
tremendous
growth
online
payments
particular
.
look
forward
working
jumia
enhance
region
’
digital
infrastructure
ecosystem
.
”
article
exclusive
subscribers
.
companies
recently
reported
excellent
financial
results.looking
historical
data
,
visa
lower
beta
(
)
coefficient
,
better
operating
margin
,
free
cash
flow
yield
dividend
yield.mastercar