In [10]:
beta = False
if beta:
    # Implementation from https://dev.to/davidisrawi/build-a-quick-summarizer-with-python-and-nltk

    from nltk.corpus import stopwords
    from nltk.stem import PorterStemmer
    from nltk.tokenize import word_tokenize, sent_tokenize


    def _create_frequency_table(text_string) -> dict:
        """
        we create a dictionary for the word frequency table.
        For this, we should only use the words that are not part of the stopWords array.
        Removing stop words and making frequency table
        Stemmer - an algorithm to bring words to its root word.
        :rtype: dict
        """
        stopWords = set(stopwords.words("english"))
        words = word_tokenize(text_string)
        ps = PorterStemmer()

        freqTable = dict()
        for word in words:
            word = ps.stem(word)
            if word in stopWords:
                continue
            if word in freqTable:
                freqTable[word] += 1
            else:
                freqTable[word] = 1

        return freqTable


    def _score_sentences(sentences, freqTable) -> dict:
        """
        score a sentence by its words
        Basic algorithm: adding the frequency of every non-stop word in a sentence divided by total no of words in a sentence.
        :rtype: dict
        """

        sentenceValue = dict()

        for sentence in sentences:
            word_count_in_sentence = (len(word_tokenize(sentence)))
            word_count_in_sentence_except_stop_words = 0
            for wordValue in freqTable:
                if wordValue in sentence.lower():
                    word_count_in_sentence_except_stop_words += 1
                    if sentence[:10] in sentenceValue:
                        sentenceValue[sentence[:10]] += freqTable[wordValue]
                    else:
                        sentenceValue[sentence[:10]] = freqTable[wordValue]

            if sentence[:10] in sentenceValue:
                sentenceValue[sentence[:10]] = sentenceValue[sentence[:10]] / word_count_in_sentence_except_stop_words

            '''
            Notice that a potential issue with our score algorithm is that long sentences will have an advantage over short sentences.
            To solve this, we're dividing every sentence score by the number of words in the sentence.

            Note that here sentence[:10] is the first 10 character of any sentence, this is to save memory while saving keys of
            the dictionary.
            '''

        return sentenceValue


    def _find_average_score(sentenceValue) -> int:
        """
        Find the average score from the sentence value dictionary
        :rtype: int
        """
        sumValues = 0
        for entry in sentenceValue:
            sumValues += sentenceValue[entry]

        # Average value of a sentence from original text
        average = (sumValues / len(sentenceValue))

        return average


    def _generate_summary(sentences, sentenceValue, threshold):
        sentence_count = 0
        summary = ''

        for sentence in sentences:
            if sentence[:10] in sentenceValue and sentenceValue[sentence[:10]] >= (threshold):
                summary += " " + sentence
                sentence_count += 1

        return summary


    def run_summarization(text):
        # 1 Create the word frequency table
        freq_table = _create_frequency_table(text)

        '''
        We already have a sentence tokenizer, so we just need
        to run the sent_tokenize() method to create the array of sentences.
        '''

        # 2 Tokenize the sentences
        sentences = sent_tokenize(text)

        # 3 Important Algorithm: score the sentences
        sentence_scores = _score_sentences(sentences, freq_table)

        # 4 Find the threshold
        threshold = _find_average_score(sentence_scores)

        # 5 Important Algorithm: Generate the summary
        summary = _generate_summary(sentences, sentence_scores, 1.3 * threshold)

        return summary

    text_str = ' nothing is more important than the health of your family. ' 
    if __name__ == '__main__':
        result = run_summarization(text_str)
        print(result)


    # credit: https://github.com/akashp1712/nlp-akash/blob/master/text-summarization/Word_Frequency_Summarization.py

In [11]:

#username = '{}'
#project_name = 'https://github.com/{}/PySeas'
import requests
from bs4 import BeautifulSoup


def get_text_from_url(url):
    # add #readme to url to get the readme file
    url = url + '#readme'
    page = requests.get(url).text
    soup = BeautifulSoup(page, 'lxml') # lxml is the parser
    # kill all script and style elements
    for script in soup(["script", "style"]):
        script.extract()    # rip it out
    # get text
    text = soup.get_text()
    # break into lines and remove leading and trailing space on each
    lines = (line.strip() for line in text.splitlines())
    # break multi-headlines into a line each
    chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
    # drop blank lines
    text = '\n'.join(chunk for chunk in chunks if chunk)
    return text


def create_project_tile(project_url):

    """
    create_project_tile creates a markdown ready tile for the project.
    example url: https://github.com/{}/seeking-contributors
    :param username: your github user name as it appears in the url of your profile (i.e. '{}' in the url above).
    :type username: str
    :param project_name: the exact name of the project as it appears in the url of it's repo main page. In the example above it would be 'seeking-contributors'
    :type project_name: str
    :return: the markdown text for your project tile.
    :rtype: str
    """
    project_name = project_url.split('/')[-1] # negative index to get last
    username = project_url.split('/')[-2] # get the username that came before the second to last /
    markdown_string = """<a href="https://github.com/{}/{}"><img width="278" src="https://denvercoder1-github-readme-stats.vercel.app/api/pin/?username={}&repo={}&theme=react&bg_color=1F222E&title_color=9ACD32&hide_border=true&icon_color=FF4500&show_icons=false" alt="{}"></a>""".format(username,project_name,username,project_name,project_name)
    if beta:
        # summarize the project by getting the readme file and reading just the first paragraph of it and summarizing it.
        # step 1 get the text from the readme file using requests and beautiful soup (bs4) libraries and finding the first paragraph.
        text = get_text_from_url(project_url)
        # the first paragraph is the first line of the text
        first_paragraph = text.split('\n')[0] # split the text by new line and get the first element
        # if the first paragraph is less than 100 characters, then it's probably not a good summary, so move on to the next paragraph.
        if len(first_paragraph) < 100:
            first_paragraph = text.split('\n')[1]
        if len(first_paragraph) < 100:
            first_paragraph = text.split('\n')[2]
        if len(first_paragraph) < 100:
            first_paragraph = text.split('\n')[3]
        
        print(first_paragraph)
        # step 2 summarize the text using the summarize function from the gensim library
        summary = run_summarization(first_paragraph)
        # step 3 add the summary to the markdown string
        #markdown_string += """<p align="center">{}</p>""".format(summary)
    else:
        summary = input("Enter a short description of the project: ")
    table_row  = """| [{}](https://github.com/{}/{}) |  ![last commit](https://img.shields.io/github/last-commit/{}/{}) ![code size](https://img.shields.io/github/languages/code-size/{}/{}) ![commit activity](https://img.shields.io/github/commit-activity/m/{}/{}) ![issues](https://img.shields.io/github/issues/{}/{}) | {} |""".format(project_name,username,project_name,username,project_name,username,project_name,username,project_name,username,project_name,summary)
    
    # concatenate the two strings with four line breaks in between them
    result =  markdown_string + '\n\n' + 'Paste the block below into the the bottom row of the table at the end of the readme.' + '\n\n' + table_row    
    return result


project_url = input("Enter the url of your project: ")

markdown_string = create_project_tile(project_url)

print(markdown_string)

<a href="https://github.com/MechanicalSoup/MechanicalSoup"><img width="278" src="https://denvercoder1-github-readme-stats.vercel.app/api/pin/?username=MechanicalSoup&repo=MechanicalSoup&theme=react&bg_color=1F222E&title_color=9ACD32&hide_border=true&icon_color=FF4500&show_icons=false" alt="MechanicalSoup"></a>

Paste the block below into the the bottom row of the table at the end of the readme.

| [MechanicalSoup](https://github.com/MechanicalSoup/MechanicalSoup) |  ![last commit](https://img.shields.io/github/last-commit/MechanicalSoup/MechanicalSoup) ![code size](https://img.shields.io/github/languages/code-size/MechanicalSoup/MechanicalSoup) ![commit activity](https://img.shields.io/github/commit-activity/m/MechanicalSoup/MechanicalSoup) ![issues](https://img.shields.io/github/issues/MechanicalSoup/MechanicalSoup) | A Python library for automating interaction with websites. MechanicalSoup automatically stores and sends cookies, follows redirects, and can follow links and submit form

In [12]:
# urls = ['https://github.com/talkygram/webcrawly','https://github.com/BKAmos/DataScience','https://github.com/VanillaLattA/DatascienceShortcuts']

# for url in urls:
#     markdown_string = create_project_tile(url)
#     print(markdown_string)

<a href="https://github.com/talkygram/webcrawly"><img width="278" src="https://denvercoder1-github-readme-stats.vercel.app/api/pin/?username=talkygram&repo=webcrawly&theme=react&bg_color=1F222E&title_color=9ACD32&hide_border=true&icon_color=FF4500&show_icons=false" alt="webcrawly"></a>

Paste the block below into the the bottom row of the table at the end of the readme.

| [webcrawly](https://github.com/talkygram/webcrawly) |  ![last commit](https://img.shields.io/github/last-commit/talkygram/webcrawly) ![code size](https://img.shields.io/github/languages/code-size/talkygram/webcrawly) ![commit activity](https://img.shields.io/github/commit-activity/m/talkygram/webcrawly) ![issues](https://img.shields.io/github/issues/talkygram/webcrawly) |  |
<a href="https://github.com/BKAmos/DataScience"><img width="278" src="https://denvercoder1-github-readme-stats.vercel.app/api/pin/?username=BKAmos&repo=DataScience&theme=react&bg_color=1F222E&title_color=9ACD32&hide_border=true&icon_color=FF4500&