In [7]:
##============ Dependencies and libraries ============##
from dotenv import load_dotenv
import os

import re
import nltk
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger')
from nltk.corpus import wordnet
from nltk.corpus import stopwords

import spacy
from rake_nltk import Rake
from keybert import KeyBERT


# Load environment variables from the .env file
load_dotenv()

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/cezarykubinski/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/cezarykubinski/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/cezarykubinski/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     /Users/cezarykubinski/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/cezarykubinski/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

### Keyword preparation

In [10]:
# Initialize spaCy and KeyBERT
nlp = spacy.load("en_core_web_sm")
keybert_model = KeyBERT()

# Base keywords
base_keywords = ["water utility", "water supply", "municipal water", "public utility", "water management", "wastewater treatment", "sewage treatment", "water infrastructure", "water pipes", "water infrastructure investment", "water technology", "desalination", "water recycling", "drought", "climate change", "water scarcity", "extreme weather", "water pollution", "water contamination", "microplastics", "lead in water", "water regulation", "environmental policy", "clean water act", "infrastructure bill", "profit", "revenue", "growth", "investment", "market share", "dividends", "fear", "optimism", "crisis", "opportunity", "shortage", "innovation", "water conservation", "efficiency", "smart water systems", "leak detection", "AI in water", "IoT in water", "water automation", "digital water"]

# Method 1: Expand Keywords with Synonyms using NLTK
def generate_keywords_with_synonyms(base_keywords):
    expanded_keywords = set(base_keywords)
    for keyword in base_keywords:
        for syn in wordnet.synsets(keyword):
            for lemma in syn.lemmas():
                expanded_keywords.add(lemma.name().replace('_', ' '))
    return list(expanded_keywords)

# Method 2: Extract Keywords Using spaCy
def extract_keywords_with_spacy(text):
    doc = nlp(text)
    keywords = [chunk.text for chunk in doc.noun_chunks]
    return list(set(keywords))

# Method 3: Extract Keywords Using RAKE
def extract_keywords_with_rake(text):
    rake = Rake()
    rake.extract_keywords_from_text(text)
    return rake.get_ranked_phrases()

# Method 4: Extract Keywords Using KeyBERT
def extract_keywords_with_keybert(text):
    keywords = keybert_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), stop_words='english')
    return [kw[0] for kw in keywords]

# Main Function to Combine All Methods
def generate_keywords_and_phrases(base_keywords, input_text):
    # Expand keywords with synonyms
    expanded_keywords = generate_keywords_with_synonyms(base_keywords)

    # Extract keywords with spaCy
    spacy_keywords = extract_keywords_with_spacy(input_text)

    # Extract keywords with RAKE
    rake_keywords = extract_keywords_with_rake(input_text)

    # Extract keywords with KeyBERT
    keybert_keywords = extract_keywords_with_keybert(input_text)

    # Combine and deduplicate results
    all_keywords = set(expanded_keywords + spacy_keywords + rake_keywords + keybert_keywords)
    return list(all_keywords)

# Example Input Text
input_text = """
Water utility and water-related companies tend to be relatively stable investments, and their prices generally increase over the long term. Here are some key trends that impact their performance:  1. Steady Demand 	•	Consistent Demand for Water: Water is essential for life, and demand for clean water and wastewater services is consistently high. This steady demand helps water utility companies generate reliable revenues, making their stocks less volatile than many others. 	•	Population Growth: As populations grow, especially in urban areas, demand for water utilities tends to increase, which can boost revenues for water companies over time.  2. Infrastructure Investment Needs 	•	Aging Infrastructure: In the U.S. and many developed countries, water infrastructure is aging and requires significant investment. Companies in this sector often receive contracts and funding for modernization projects, which can support long-term growth. 	•	Innovation and Efficiency: Companies that provide technology to manage and conserve water resources, like Xylem, benefit from the rising need for sustainable water management, especially in water-scarce regions.  3. Environmental and Regulatory Factors 	•	Climate Change Impact: Climate-related issues like droughts, flooding, and pollution are driving investment in water management, treatment, and recycling technologies. This can positively impact companies involved in water purification, wastewater treatment, and infrastructure resilience. 	•	Environmental Regulations: Stricter regulations on water quality and environmental protections require utilities to invest in advanced technologies and sustainable practices. This can lead to stable revenue streams for water companies over time.  4. Dividend-Paying Stocks 	•	Many water utility companies pay dividends, which makes them attractive to investors looking for steady income. Dividends tend to attract long-term investors, which can stabilize stock prices and contribute to gradual appreciation over time.  Historical Performance 	•	Long-Term Growth: Water utility stocks and infrastructure providers, like American Water Works, Essential Utilities, and Xylem, have shown long-term growth trends, though they tend to rise slowly compared to high-growth sectors like tech. 	•	Low Volatility: Water stocks are typically less volatile than other sectors, especially during economic downturns, because water demand is stable. This has made them historically resilient even during recessions.  General Outlook  While no stock is without risk, water companies are often viewed as defensive investments with stable, long-term growth potential. However, their prices may fluctuate in response to economic changes, regulatory updates, and shifts in water demand in specific regions.  If you’re looking for steady growth with relatively low volatility, water companies can be a good choice, especially if you are focused on a long-term investment strategy.  The prices of water companies are influenced by a mix of global and local events, spanning environmental, economic, regulatory, and social changes. Here’s a look at the main factors:  1. Climate Change and Extreme Weather 	•	Droughts: Prolonged droughts increase water scarcity, especially in arid regions like the southwestern United States, leading to higher demand for water management and conservation services. This can drive up prices for companies involved in water recycling, desalination, and efficient water infrastructure. 	•	Flooding and Hurricanes: Floods and storms can damage water infrastructure and increase the need for repairs and new investments. Companies involved in water infrastructure and emergency water treatment often benefit from this increased spending. 	•	Rising Temperatures: Higher temperatures increase water consumption (e.g., for agriculture and cooling systems), especially in urban areas, which can boost revenues for water utility companies.  2. Population Growth and Urbanization 	•	Growing Urban Populations: As more people move to cities, especially in fast-growing regions, the demand for municipal water and wastewater services rises. Companies providing these services, such as Essential Utilities, tend to see increased revenue opportunities. 	•	Resource Scarcity in Water-Stressed Regions: Rapidly growing populations in arid or drought-prone areas can create a steady demand for advanced water treatment and management solutions, which benefits companies that provide infrastructure and conservation technologies.  3. Government and Environmental Regulations 	•	Water Quality Standards: Governments worldwide have strict regulations for water purity and wastewater treatment, often requiring companies to invest in advanced technologies. This creates consistent business for companies involved in water purification and infrastructure. 	•	Incentives and Subsidies: Some governments offer subsidies or incentives to encourage investments in water-efficient infrastructure, conservation technologies, or sustainable water management. These incentives can boost profits for companies that provide relevant solutions. 	•	International Environmental Agreements: Global efforts to address climate issues, like the Paris Agreement, push countries to adopt sustainable water practices, which increases investment in water conservation technologies and sustainable infrastructure.  4. Economic Conditions and Interest Rates 	•	Recession Resistance: Water is an essential utility, so water companies often experience stable demand even during economic downturns. This can make water stocks more attractive in uncertain economic times, potentially boosting their prices. 	•	Interest Rate Changes: Infrastructure projects often require financing, so higher interest rates can increase costs for water companies and impact their profits, which can lead to short-term price drops.  5. Technological Innovations 	•	Water Recycling and Desalination: Advances in desalination (converting seawater into freshwater) and water recycling technologies are creating new revenue opportunities, especially in water-scarce regions. Companies leading in these technologies, like Xylem, often benefit from this trend. 	•	Smart Water Management: Digital technology in water management, such as sensors and AI for leak detection, is improving efficiency. Companies that adopt or provide these technologies can benefit as they reduce water waste and improve cost-effectiveness for cities and companies.  6. Public Health and Pollution Concerns 	•	Water Contamination Incidents: Major contamination events (like Flint, Michigan’s water crisis) drive regulatory changes and public pressure to improve water infrastructure. This creates demand for companies involved in water purification and monitoring. 	•	Focus on Clean Drinking Water: Growing awareness of water pollution and contaminants (like lead and microplastics) encourages investment in clean water technologies and infrastructure. This can positively impact companies in the water sector, especially those focused on purification.  7. Mergers, Acquisitions, and Privatization 	•	Acquisitions of Smaller Utilities: Larger water companies often acquire smaller, regional utilities to expand their reach. This can lead to stock price increases for the acquiring company if it helps capture more revenue. 	•	Privatization Trends: Some governments privatize water utilities or bring in private companies to manage public water resources, opening new markets for companies. This can increase demand and revenue for water companies entering these contracts.  8. Public Attitudes and ESG Investing 	•	Environmental, Social, and Governance (ESG) Investing: Growing public interest in sustainable and ethical investing has increased the focus on companies addressing water scarcity and conservation. Water companies with strong environmental practices often attract more investment as they appeal to ESG-focused funds. 	•	Pressure for Sustainable Water Management: Investors and consumers are increasingly scrutinizing how companies manage water resources. Firms that demonstrate commitment to sustainable water use may see increased investor interest and higher stock prices.  These events and trends create both challenges and opportunities for water companies, affecting their stock prices based on how they respond to changing environmental, social, and economic demands.
"""
# Generate keywords and phrases
keywords_and_phrases = generate_keywords_and_phrases(base_keywords, input_text)

# Join the list of phrases into a single string
joined_text = ' '.join(keywords_and_phrases)

# Remove unwanted characters (bullet points, special characters, etc.)
cleaned_text = re.sub(r'[•\.\,\!\?\n\r\t]+', ' ', joined_text)  # Replace unwanted characters with a space
cleaned_text = re.sub(r'\s{2,}', ' ', cleaned_text)  # Remove multiple spaces
cleaned_text = cleaned_text.strip()  # Remove leading and trailing spaces

# Optionally normalize the text (e.g., lowercase)
cleaned_text = cleaned_text.lower()

# Split the cleaned string back into a list of words or phrases
cleaned_keywords = cleaned_text.split(' ')  # Split by space into individual words

# Optional: If you want to remove very common stopwords (you can add more to the list)
stopwords = {"the", "and", "a", "of", "to", "in", "for", "on", "with", "at", "by", "from", "as", "an", "you"}
cleaned_keywords = [word for word in cleaned_keywords if word not in stopwords]

# Step 5: Print out the cleaned keywords
for keyword in cleaned_keywords:
    print(keyword)

print(len(cleaned_keywords))


influenced
population
growth
investiture
some
key
trends
rise
slowly
compared
create
investment
funds
investing
investors
though
their
stock
prices
climate
tax
income
shortfall
water
increased
spending
environmental
policy
steady
income
leak
detection
agriculture
cooling
systems
climate
change
like
lead
advances
digital
technology
sustainable
water
management
long-term
growth
trends
helps
capture
often
benefit
lucre
related
companies
tend
clean
water
act
these
contracts
7
xylem
risk
steady
demand
potentially
boosting
significant
investment
u
s
encourage
investments
investment
like
increasingly
scrutinizing
governments
often
requiring
companies
instauration
net
income
companies
providing
sustainable
water
governments
privatize
water
utilities
paris
agreement
sustainable
infrastructure
water
sector
populations
grow
these
events
dividend-paying
stocks
many
water
utility
companies
specific
regions
climate
change
water
utility
companies
focus
agriculture
drouth
water
stocks
no
stock
emergen