# Start Here

In [1]:
import json
import textwrap
import pytrends
from pytrends.request import TrendReq
import nltk
from nltk.corpus import wordnet
import random
from tqdm import tqdm
from nltk.corpus import wordnet
from nltk.metrics.distance import edit_distance
from nltk.corpus import stopwords
# PlaintextParser
from sumy.parsers.plaintext import PlaintextParser
# LexRankSummarizer
from sumy.summarizers.lex_rank import LexRankSummarizer
# LsaSummarizer
from sumy.summarizers.lsa import LsaSummarizer
# define Tokenizer
from sumy.nlp.tokenizers import Tokenizer
# define Stemmer
from sumy.nlp.stemmers import Stemmer
# define language
pytrend = TrendReq(hl='en-US', tz=360)
stopwords = nltk.corpus.stopwords.words('english')


import os
from PIL import Image, ImageDraw, ImageFont
# with open("ppn_deck_cleaned.json", "w") as write_file:
#     json.dump(card_deck, write_file, indent=4)

# read card_deck from ppn_deck.json file
with open("ppn_deck.json", "r") as read_file:
    card_deck = json.load(read_file)


# clear the card_images folder
print("Clearing card_images folder...")
for filename in os.listdir("card_images"):
    os.remove(os.path.join("card_images", filename))



def summarize_text(text, num_sentences):
    """
    Summarize the given text using the LSA or LexRank summarization algorithms and return the summary as a string
    """
    # create a PlaintextParser object to parse the text
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    # choose a summarization algorithm
    # algorithm = LsaSummarizer()
    algorithm = LexRankSummarizer()

    # summarize the text and return the summary as a string
    summary = algorithm(parser.document, num_sentences)
    summary_text = "\n".join([str(sentence) for sentence in summary])

    return summary_text



def get_google_trends_score(title):
    # get the google trends score for the query (for all time)
    # this is to determine how popular the topic is
    title = str(title)
    pytrend.build_payload(kw_list=[title], timeframe='all')
    interest_over_time_df = pytrend.interest_over_time()
    # get the score for the last 12 months, and return the mean
    recent_score = interest_over_time_df[title][-12:].mean()
    # get the score for all time
    all_time_score = interest_over_time_df[title].mean()
    # return the higher of the two scores
    return max(recent_score, all_time_score)

def generate_card(title, definition, points):
    # determine the font size based on the length of the definition
    # font_size = int(len(definition) / 20)
    font_size = max(30, int(len(definition) / 20))
    # create the image and draw objects
    # set the canvas size to 8.5 cm by 5.5 cm
    image = Image.new('RGB', (550, 850), (255, 255, 255))
    draw = ImageDraw.Draw(image)

    # select a font and draw the title in a rectangle
    font = ImageFont.truetype('./fonts/Menlo.ttc', 15)
    draw.rectangle([(10, 10), (540, 50)], fill='lightgrey')
    draw.text((20, 20), title, fill=(0, 0, 0), font=font)

    # draw the definition in a rectangle, and soft wrap the text. Don't exceed 40 characters per line. 
    # wrapped_definition = textwrap.wrap(definition, width=40)
    definition = str(definition) if isinstance(definition, str) else definition[0]
    wrapped_definition = textwrap.fill(definition, width=80)
    wrapped_definition_str = "\n".join(wrapped_definition)
    font = ImageFont.truetype('./fonts/Menlo.ttc', font_size)
    draw.text((20, 70), wrapped_definition, fill=(0, 0, 0))

    # draw a circle around the point value
    draw.ellipse([(520, 820), (540, 840)], fill='lightgreen')
    # draw the point value

    font = ImageFont.truetype('./fonts/Menlo.ttc', font_size + 1)
    draw.text((525, 825), str(points), fill=(0, 0, 0))

    # save the image
    image.save('./card_images/{}.png'.format(len(os.listdir('./card_images/'))))

def generate_physical_cards():
    #^ Example usage
    card = random.choice(card_deck)
    print(card)
    summary = card['summary'][1] if isinstance(card['summary'], list) else card['summary']
    # summarize the definition with the summarize function
    summary = summarize_text(summary, 2) if isinstance(summary, str) else summary # if the summary is a list, then it's already been summarized
    if isinstance(summary, str):
        summary = summarize_text(summary, 2)
    if isinstance(summary, list):
        summary = ' '.join(summary)
    #?points = len(set(summary.split()) - set(stopwords.words('english'))) # all words not in the stopword list
    points = len(set(summary.split())) # all words
    generate_card(str(card['title']), summary, points=points)
    # generate_card('test title', 'test definition', 10)

    # iterate through each card and generate a card image for it
    # note: if the card has been summarized already, then the summary will be a list, so we need to get the first element of the list
    for card in tqdm(card_deck):
        title = card['title']
        summary = card['summary'][1] if isinstance(card['summary'], list) else card['summary']
        # summarize the definition with the summarize function
        summary = summarize_text(summary, 2) if isinstance(summary, str) else summary # if the summary is a list, then it's already been summarized
        if isinstance(summary, str):
            summary = summarize_text(summary, 2)
        if isinstance(summary, list):
            summary = ' '.join(summary)
        # make the point value the number of unique words in the summary (unique to the card compared to other cards)
        #?points = len(set(summary.split()) - set(stopwords.words('english'))) # all words not in the stopword list
        points = len(set(summary.split())) # all words
        # generate the card
        generate_card(str(card['title']), summary, points=points) # get_google_trends_score(card['title'])
        #!print(f'Found a score of {get_google_trends_score(card["title"])} for {card["title"]}')

print("Initialized process, and ready to generate physical cards...")

Clearing card_images folder...
Initialized process, and ready to generate physical cards...


In [2]:

generate_physical_cards()
print("Done")

{'title': 'Guerrilla Zoo', 'summary': ['Guerrilla Zoo', 'Guerrilla Zoo is a contemporary arts organisation formed in 2004 by founder and creative director James Elphick. The group produce a variety of creative events from experiential environments, live concerts, festivals, immersive theatre, art exhibitions, arts awards, parties and masquerade balls.'], 'related': 24, 'summary_short': "['Guerrilla Zoo', 'Guerrilla Zoo is a contemporary arts organisation formed in 2004 by founder and creative director James Elphick.The group produce a variety of creative events from experiential environments, live concerts, festivals, immersive theatre, art exhibitions, arts awards, parties and masquerade balls.']"}


100%|██████████| 2521/2521 [00:43<00:00, 57.74it/s]

Done





In [3]:
from sklearn.preprocessing import MinMaxScaler

# Create a MinMaxScaler object
scaler = MinMaxScaler()

# Get a list of the links_on_page values from the card_deck
links_on_page_values = [card['links_on_page'] for card in card_deck]

# Fit the scaler on the links_on_page values
scaler.fit(links_on_page_values)

# Use the scaler to transform the links_on_page values
scaled_links_on_page = scaler.transform(links_on_page_values)

# Update the points value for each card in the card_deck with the scaled links_on_page value
for i, card in enumerate(card_deck):
    card['points'] = int(scaled_links_on_page[i])


KeyError: 'links_on_page'