In [None]:
# pip install wordcloud numpy matplotlib pillow

In [None]:
import numpy as np
import pandas as pd
from PIL import Image
from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt
import random

# Load the custom mask image (Christmas tree shape)
mask_image = np.array(Image.open("christmas_tree.png"))

# Read the "Index Keywords" column from the CSV file
# Replace with your own file
df = pd.read_csv("hkust_pub_2024_scopus.csv") 
if "Index Keywords" not in df.columns:
    raise ValueError("The file must contain a column named 'Index Keywords'.")

# Combine all text from the "Index Keywords" column into a single string
text = " ".join(df["Index Keywords"].dropna())

# Remove stop words
stopwords = set(STOPWORDS)

# Define a custom color function for Christmas colors
def christmas_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
    red = (255, random.randint(50, 150), random.randint(50, 150))
    green = (random.randint(50, 150), 255, random.randint(50, 150))
    gold = (255, 223, random.randint(50, 100))
    colors = [red, green, gold]
    chosen_color = random.choice(colors)
    scale = font_size / 100  # Scale brightness by font size
    scaled_color = tuple(int(c * scale) for c in chosen_color)
    return f"rgb{scaled_color}"

# Generate the word cloud with high resolution
wordcloud = WordCloud(
    background_color="white",
    mask=mask_image,
    contour_width=3,
    contour_color="black",
    max_words=200,
    stopwords=stopwords,
    color_func=christmas_color_func,
    width=3000,
    height=3000,
).generate(text)

# Directly save the word cloud as a high-resolution image
wordcloud.to_file("christmas_tree_wordcloud.png")

# Display the word cloud using Matplotlib
plt.figure(figsize=(12, 12))  # Ensure the figure size matches the word cloud dimensions
plt.imshow(wordcloud, interpolation="bilinear")  # Bilinear for smooth rendering
plt.axis("off")
plt.title("Merry Christmas!", fontsize=20)

# Save the image with high DPI
plt.savefig("christmas_tree_wordcloud_matplotlib.png", dpi=300, bbox_inches="tight", pad_inches=0)
plt.show()