### Import Libraries

In [1]:
import re
import slate3k as slate
import requests
import config
from sumy.utils import get_stop_words
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.nlp.stemmers import Stemmer
from sumy.summarizers.lsa import LsaSummarizer

#### Stategy
Summarize three books individually
- Clean data
- Summarize
- Get sentiments 
- Add to dataframe

#### Load data

In [2]:
with open('The Subtle Art of Not Giving a Fck - A Counterintuitive Approach to Living a Good Life (2016).pdf', 'rb') as f:
    extracted_text = slate.PDF(f)
    full_text =  ''.join(extracted_text)

#### Extract and slice the book sections

In [3]:
def extract_content_body(full_text):
    positions = []
    for match in re.finditer('CHAPTER 1|Acknowledgments|ACKNOWLEDGMENTS', full_text):
          positions.append(match.span())
    
    content_page = full_text[positions[0][0]:positions[1][0]]
    body_text = full_text[positions[2][0]:positions[3][0]]
    return content_page, body_text

In [4]:
content_page, body_text = extract_content_body(full_text)

#### Cleaning of the content and body section

In [5]:
def clean_content_body(content_page, body_text):
    new_list = []
    pattern = r'\x0c|\n'
    new = content_page.split('\n')
    for i in new:
        if len(i) < 2 :
            del new[new.index(i)]
    for i in new:
        y = re.sub('\x0c', '', i)
        new_list.append(y.strip())
    for i in new_list:
        if len(i) < 2 :
            del new_list[new_list.index(i)]
    book_clean = re.sub(pattern, ' ',body_text)
    book_clean = book_clean.replace('w ','w')
    body_clean = ''.join(book_clean.strip(' '))
    return new_list, body_clean

In [6]:
new_list, body_clean = clean_content_body(content_page, body_text)

#### Split the content into chapters and sub_chapters

In [7]:
def split_content(new_list):
    split_titles = []
    sub_title = []
    chap_title = []
    chap_sub = []
    for sentence in new_list:
        if sentence.startswith('CHAPTER'):
            new = sentence.split(':')
            for i in new:
                split_titles.append(i.strip())
        else:
            sub_title.append(sentence)
    for title in split_titles:
        if title.startswith('CHAPTER'):
            chap_title.append(title)
        else:
            chap_sub.append(title)
    return sub_title, chap_title, chap_sub

In [8]:
sub_title, chap_title, chap_sub = split_content(new_list)

### Remove the sub_topics and titles

In [9]:
def remove_sub_titles(body_clean):
    for sub in sub_title:
        if sub in body_clean:
            body_clean = body_clean.replace(sub, '')
    for sub in chap_sub:
        if sub in body_clean:
            body_clean = body_clean.replace(sub, '')
    return body_clean

In [10]:
body_clean = remove_sub_titles(body_clean)

#### Get the index of each chapter and Split the body of text in chapter 

In [11]:
def split_into_chapters(body_clean, chap_title):
    post_index = []
    info =[]
    join_chapters = '|'.join(chap_title)
    for match in re.finditer(join_chapters, body_clean):
        post_index.append(match.span())
    new_index = [i[0] for i in post_index]
    new = list(zip(new_index[:],new_index[1:]))
    for i in new:
        if i == new[-1]:
            info.append(body_clean[i[0]:i[1]])
            info.append(body_clean[i[1]:])
        else:
            info.append(body_clean[i[0]:i[1]])
    return info

In [12]:
# post_index = split_into_chapters(body_clean, chap_title)
# post_index

In [13]:
info = split_into_chapters(body_clean, chap_title)

#### Remove titles and convert the chapter title, sub_title and text body into a dictionary

### Split the body of text in chapter 

In [14]:
def final_convert(info, chap_sub, chap_title):
    content = []
    for i in info:
        new_sentence = re.sub(r"[A-Z]+\s\d", '',i)
        new_sentence = new_sentence.strip()
        content.append(new_sentence)
    sub_content = list(zip(chap_sub, content))
    subtle_fuck = dict(zip(chap_title, sub_content))
    return subtle_fuck, content

In [15]:
subtle_fuck, content = final_convert(info, chap_sub, chap_title)

In [16]:
import pandas as pd
df = pd.DataFrame.from_dict(subtle_fuck, orient='index').reset_index()
df.columns = ['Chapter','Sub_title','Content']
df.insert(0, 'Title', 'The Subtle Art of not giving a fuck by Mark')
df.head()

Unnamed: 0,Title,Chapter,Sub_title,Content
0,The Subtle Art of not giving a fuck by Mark,CHAPTER 1,Don’t Try,"Charles Bukowski was an alcoholic, a womanizer..."
1,The Subtle Art of not giving a fuck by Mark,CHAPTER 2,Happiness Is a Problem,"About twenty-five hundred years ago, in the Hi..."
2,The Subtle Art of not giving a fuck by Mark,CHAPTER 3,You Are Not Special,I once knewa guy; we’ll call him Jimmy. Jimmy ...
3,The Subtle Art of not giving a fuck by Mark,CHAPTER 4,The Value of Suffering,"In the closing months of 1944, after almost a ..."
4,The Subtle Art of not giving a fuck by Mark,CHAPTER 5,You Are Always Choosing,Imagine that somebody puts a gun to your head ...


#### summarize the content

In [17]:
from sumy.utils import get_stop_words
def summarize(content, language, sentence_count):
    summary = []
    
    for text in content:
        parser = PlaintextParser(text, Tokenizer(language))
        summarizer = LsaSummarizer(Stemmer(language))
        summarizer.stop_words = get_stop_words(language)
        summary_l = summarizer(parser.document, sentence_count)
        lex=[str(sentence) for sentence in summary_l]
        sum_l= ''.join(lex)
        summary.append(sum_l)
    return  summary

In [18]:
language = "english"
summary_5= summarize(content, language, 5)
summary_10 = summarize(content, language, 10)
df['summary_5'] = summary_5
df['summary_10'] = summary_10


### To analyse the summary

In [19]:
url = "https://twinword-emotion-analysis-v1.p.rapidapi.com/analyze/"
headers = {
    'x-rapidapi-host': "twinword-emotion-analysis-v1.p.rapidapi.com",
    'x-rapidapi-key': config.api_key
    }
def emotion_analysis(summary):
    emo = []
   
    for s in summary:
        querystring = {"text":s}
        response = requests.request("GET", url, headers=headers, params=querystring)
        text_dic = response.text
        emo.append(text_dic)
    
#       
    return emo

In [20]:
emo_5= emotion_analysis(summary_5)
emo_10= emotion_analysis(summary_10)

### To clean the emotion dictionary

In [21]:
def clean_sentiments(emo):
    great = []
    for i in emo:
            new = re.search('\[.*?\]',i)
            j = i[new.start():new.end()]
            new_text = re.search('\w+', j)
            if new_text == None:
                great.append('no emotion detected')
            else:
                great.append(str(new_text.group()))
    return great

In [23]:
great_5 = clean_sentiments(emo_5)
great_10 = clean_sentiments(emo_10)
df['emotion_analysis_5'] = great_5
df['emotion_analysis_10'] = great_10

In [24]:
df

Unnamed: 0,Title,Chapter,Sub_title,Content,summary_5,summary_10,emotion_analysis_5,emotion_analysis_10
0,The Subtle Art of not giving a fuck by Mark,CHAPTER 1,Don’t Try,"Charles Bukowski was an alcoholic, a womanizer...",Everyone and their TV commercial wants you to ...,Everyone and their TV commercial wants you to ...,fear,sadness
1,The Subtle Art of not giving a fuck by Mark,CHAPTER 2,Happiness Is a Problem,"About twenty-five hundred years ago, in the Hi...",He’d wear a cheesy eye mask and a shirt (with ...,He’d wear a cheesy eye mask and a shirt (with ...,joy,no emotion detected
2,The Subtle Art of not giving a fuck by Mark,CHAPTER 3,You Are Not Special,I once knewa guy; we’ll call him Jimmy. Jimmy ...,"On any given day, if you asked him what he was...","On any given day, if you asked him what he was...",joy,joy
3,The Subtle Art of not giving a fuck by Mark,CHAPTER 4,The Value of Suffering,"In the closing months of 1944, after almost a ...","Born after the war ended, he had dropped out o...","Born after the war ended, he had dropped out o...",joy,sadness
4,The Subtle Art of not giving a fuck by Mark,CHAPTER 5,You Are Always Choosing,Imagine that somebody puts a gun to your head ...,Nowimagine that you bought nice shoes and runn...,Nowimagine that you bought nice shoes and runn...,anger,anger
5,The Subtle Art of not giving a fuck by Mark,CHAPTER 6,You’re Wrong About Everything (But So Am I),Five hundred years ago cartographers believed ...,"As she describes in her autobiography, My Lie:...",Beliefs of this sort—that I’m not attractive e...,surprise,joy
6,The Subtle Art of not giving a fuck by Mark,CHAPTER 7,Failure Is the Way Forward,I really mean it when I say it: I was fortunat...,When you’re sleeping on a smelly futon and hav...,I spent the next six months living on a friend...,no emotion detected,joy
7,The Subtle Art of not giving a fuck by Mark,CHAPTER 8,The Importance of Saying No,"In 2009, I gathered up all my possessions, sol...","Ultimately, the only way to achieve meaning an...",Armed with this grandiose sense of connectivit...,fear,fear
8,The Subtle Art of not giving a fuck by Mark,CHAPTER 9,. . . And Then You Die,"Seek the truth for yourself, and I will meet y...","Whether it be through mastering an art form, c...","At the time, Zen was seen as something for hip...",sadness,fear


In [25]:
df.to_csv('book_df.csv')