# Start Here

In [5]:
import json
import textwrap
import pytrends
from pytrends.request import TrendReq
import nltk
from nltk.corpus import wordnet
import random
from tqdm import tqdm
from nltk.corpus import wordnet
from nltk.metrics.distance import edit_distance
from nltk.corpus import stopwords
# PlaintextParser
from sumy.parsers.plaintext import PlaintextParser
# LexRankSummarizer
from sumy.summarizers.lex_rank import LexRankSummarizer
# LsaSummarizer
from sumy.summarizers.lsa import LsaSummarizer
# define Tokenizer
from sumy.nlp.tokenizers import Tokenizer
# define Stemmer
from sumy.nlp.stemmers import Stemmer
# define language
pytrend = TrendReq(hl='en-US', tz=360)
stopwords = nltk.corpus.stopwords.words('english')


import os
from PIL import Image, ImageDraw, ImageFont
# with open("ppn_deck_cleaned.json", "w") as write_file:
#     json.dump(card_deck, write_file, indent=4)

# read card_deck from ppn_deck.json file
with open("ppn_deck.json", "r") as read_file:
    card_deck = json.load(read_file)


# clear the card_images folder
print("Clearing card_images folder...")
for filename in os.listdir("card_box"):
    os.remove(os.path.join("card_box", filename))



def summarize_text(text, num_sentences):
    """
    Summarize the given text using the LSA or LexRank summarization algorithms and return the summary as a string
    """
    # create a PlaintextParser object to parse the text
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    # choose a summarization algorithm
    # algorithm = LsaSummarizer()
    algorithm = LexRankSummarizer()

    # summarize the text and return the summary as a string
    summary = algorithm(parser.document, num_sentences)
    summary_text = "\n".join([str(sentence) for sentence in summary])

    return summary_text



def get_google_trends_score(title):
    # get the google trends score for the query (for all time)
    # this is to determine how popular the topic is
    title = str(title)
    pytrend.build_payload(kw_list=[title], timeframe='all')
    interest_over_time_df = pytrend.interest_over_time()
    # get the score for the last 12 months, and return the mean
    recent_score = interest_over_time_df[title][-12:].mean()
    # get the score for all time
    all_time_score = interest_over_time_df[title].mean()
    # return the higher of the two scores
    return max(recent_score, all_time_score)

def generate_card(title, definition, points, name=None):
    # if the font is 20 then the max width of the text is 40 characters. Use this to determine how large the title should be.
    image = Image.new('RGB', (550, 850), (255, 255, 255))
    draw = ImageDraw.Draw(image)
    # the title is wrapped to 40 characters, so the height of the rectangle is the number of characters * 20 (the height of the font)
    title_size = 20
    font_title = ImageFont.truetype('./fonts/Menlo.ttc', title_size)
    font_description = ImageFont.truetype('./fonts/Menlo.ttc', 20)
    font_points = ImageFont.truetype('./fonts/Menlo.ttc', 18)
    title_wrapped = textwrap.wrap(title, width=40)
    # draw the title centered horizontally, and 30 pixels from the top. The title is wrapped to 40 characters, so the height of the rectangle is the number of characters * 20 (the height of the font)

    title_rectangle_height = len(title_wrapped) * 20
    draw.rectangle([(10, 10), (540, 10 + title_rectangle_height)], fill='lightblue')
    y_text = 20
    for line in title_wrapped:
        draw.text((270, y_text), line, fill=(0, 0, 0), font=font_title, anchor='mm')
        y_text += 20
    # draw the definition left justified, and 10 pixels from the the bottom of the title rectangle. The definition is wrapped to 40 characters, so the height of the rectangle is the number of characters / 40 * 20 (the height of the font).
    definition_wrapped = textwrap.wrap(definition, width=40)
    definition_rectangle_height = len(definition_wrapped) * 20
    draw.rectangle([(10, 10 + title_rectangle_height + 10), (540, 10 + title_rectangle_height + 10 + definition_rectangle_height)], fill='white')
    y_text = 10 + title_rectangle_height + 20
    for line in definition_wrapped:
        draw.text((10, y_text), line, fill=(0, 0, 0), font=font_description, anchor='lm')
        y_text += 20



    # draw the points at the bottom of the card centered horizontally, and 20 pixels from the bottom
    # draw the rectangle around the points, with a light green background, add 10 pixels to the height of the rectangle to make it a little bigger and center the text within the rectangle vertically
    draw.rectangle([(10, 850 - 30 - 10), (540, 850 - 10)], fill='lightgreen')
    # draw the text in the rectangle
    draw.text((270, 850 - 30 - 5), str(points), fill=(0, 0, 0), font=font_points, anchor='mm') # anchor='mm' centers the text horizontally and vertically
    # save the image with the name of the card if not None
    if name is not None:
        image.save('./card_box/{}.png'.format(name)) # save the image with the name of the card, if it's not None
    else:
        image.save('./card_box/{}.png'.format(len(os.listdir('./card_box/')))) # save the image with the name of the number of images in the folder

def generate_physical_cards():
    #^ Example usage
    card = random.choice(card_deck)
    print(card)
    summary = card['summary'][1] if isinstance(card['summary'], list) else card['summary']
    # summarize the definition with the summarize function
    summary = summarize_text(summary, 2) if isinstance(summary, str) else summary # if the summary is a list, then it's already been summarized
    if isinstance(summary, str):
        summary = summarize_text(summary, 2)
    if isinstance(summary, list):
        summary = ' '.join(summary)
    #?points = len(set(summary.split()) - set(stopwords.words('english'))) # all words not in the stopword list
    points = len(set(summary.split())) # all words
    generate_card(str(card['title']), summary, points=points)
    # generate_card('test title', 'test definition', 10)

    # iterate through each card and generate a card image for it
    # note: if the card has been summarized already, then the summary will be a list, so we need to get the first element of the list
    for card in tqdm(card_deck):
        title = card['title']
        summary = card['summary'][1] if isinstance(card['summary'], list) else card['summary']
        # summarize the definition with the summarize function
        summary = summarize_text(summary, 2) if isinstance(summary, str) else summary # if the summary is a list, then it's already been summarized
        if isinstance(summary, str):
            summary = summarize_text(summary, 2)
        if isinstance(summary, list):
            summary = ' '.join(summary)
        # make the point value the number of unique words in the summary (unique to the card compared to other cards)
        #?points = len(set(summary.split()) - set(stopwords.words('english'))) # all words not in the stopword list
        points = len(set(summary.split())) # all words
        # generate the card
        generate_card(str(card['title']), summary, points=points) # get_google_trends_score(card['title'])
        #!print(f'Found a score of {get_google_trends_score(card["title"])} for {card["title"]}')

print("Initialized process, and ready to generate physical cards...")

Clearing card_images folder...
Initialized process, and ready to generate physical cards...


In [6]:

generate_physical_cards()
print("Done")

{'title': 'United States', 'summary': ['United States', 'United States, The United States of America U.S.A. or USA, commonly known as the United States U.S. or US or America, is a country primarily located in North America. It consists of 50 states, a federal district, five major unincorporated territories, nine Minor Outlying Islands, and 326 Indian reservations. It is the worlds third-largest country by both land and total area. The United States shares land borders with Canada to its north and with Mexico to its south. It has maritime borders with the Bahamas, Cuba, Russia, and other nations. With a population of over 331 million, it is the most populous country in the Americas and the third most populous in the world. The national capital is Washington, D.C., and the most populous city and financial center is New York City.\\nPaleo-Americans migrated from Siberia to the North American mainland at least 12,000 years ago, and advanced cultures began to appear later on. These advanced

100%|██████████| 4651/4651 [02:00<00:00, 38.53it/s]

Done





In [7]:
from sklearn.preprocessing import MinMaxScaler

# Create a MinMaxScaler object
scaler = MinMaxScaler()

# Get a list of the links_on_page values from the card_deck
links_on_page_values = [card['links_on_page'] for card in card_deck]

# Fit the scaler on the links_on_page values
scaler.fit(links_on_page_values)

# Use the scaler to transform the links_on_page values
scaled_links_on_page = scaler.transform(links_on_page_values)

# Update the points value for each card in the card_deck with the scaled links_on_page value
for i, card in enumerate(card_deck):
    card['points'] = int(scaled_links_on_page[i])


KeyError: 'links_on_page'