### Importing librairies

In [1]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import spacy as sp
from textblob import TextBlob
from langdetect import DetectorFactory, detect, detect_langs

### Load the data

In [2]:
data = pd.read_csv('project-gutenberg-books.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,Subject,Medium,Link,Text,Author,Title,Date
0,0,Philosophy,Book,https://www.gutenberg.org/ebooks/1497,Produced by Sue Asscher THE REPUBLIC By ...,Plato,The Republic,
1,1,Philosophy,Book,https://www.gutenberg.org/ebooks/1998,Produced by Sue Asscher THUS SPAKE ZARATH...,Friedrich Nietzsche,Thus Spake Zarathustra,
2,2,Philosophy,Book,https://www.gutenberg.org/ebooks/4363,"Produced by John Mamoun, Charles Franks and th...",Friedrich Nietzsche,Beyond Good and Evil,
3,3,Philosophy,Book,https://www.gutenberg.org/ebooks/61,Transcribed by Allen Lutins with assistance fr...,Karl Marx and Friedrich Engels,The Communist Manifesto\r\n by Karl Marx and F...,
4,4,Philosophy,Book,https://www.gutenberg.org/ebooks/5740,Transcribed by Allen Lutins with assistance fr...,Karl Marx and Friedrich Engels,The Communist Manifesto\r\n by Karl Marx and F...,


In [3]:
data['Subject'].unique()

array(['Philosophy', 'Sociology', 'Anarchism', 'Racism'], dtype=object)

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 166 entries, 0 to 165
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  166 non-null    int64  
 1   Subject     166 non-null    object 
 2   Medium      166 non-null    object 
 3   Link        166 non-null    object 
 4   Text        165 non-null    object 
 5   Author      154 non-null    object 
 6   Title       166 non-null    object 
 7   Date        0 non-null      float64
dtypes: float64(1), int64(1), object(6)
memory usage: 10.5+ KB


In [5]:
data.drop(['Unnamed: 0', 'Date'], axis=1, inplace=True)
data.head()

Unnamed: 0,Subject,Medium,Link,Text,Author,Title
0,Philosophy,Book,https://www.gutenberg.org/ebooks/1497,Produced by Sue Asscher THE REPUBLIC By ...,Plato,The Republic
1,Philosophy,Book,https://www.gutenberg.org/ebooks/1998,Produced by Sue Asscher THUS SPAKE ZARATH...,Friedrich Nietzsche,Thus Spake Zarathustra
2,Philosophy,Book,https://www.gutenberg.org/ebooks/4363,"Produced by John Mamoun, Charles Franks and th...",Friedrich Nietzsche,Beyond Good and Evil
3,Philosophy,Book,https://www.gutenberg.org/ebooks/61,Transcribed by Allen Lutins with assistance fr...,Karl Marx and Friedrich Engels,The Communist Manifesto\r\n by Karl Marx and F...
4,Philosophy,Book,https://www.gutenberg.org/ebooks/5740,Transcribed by Allen Lutins with assistance fr...,Karl Marx and Friedrich Engels,The Communist Manifesto\r\n by Karl Marx and F...


In [6]:
data = data.applymap(str)

In [7]:
data.drop_duplicates(inplace=True, subset=['Author', 'Title'], keep='first')
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 127 entries, 0 to 165
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Subject  127 non-null    object
 1   Medium   127 non-null    object
 2   Link     127 non-null    object
 3   Text     127 non-null    object
 4   Author   127 non-null    object
 5   Title    127 non-null    object
dtypes: object(6)
memory usage: 6.9+ KB


In [8]:
data['Author'].unique()

array(['Plato', 'Friedrich Nietzsche', 'Karl Marx and Friedrich Engels',
       'John Dewey', 'John Locke', 'John Stuart Mill',
       'Benedict de Spinoza', 'Immanuel Kant', 'F. W. Nietzsche',
       'Aristotle', 'nan', 'Friedrich Wilhelm Nietzsche', 'Lewis Carroll',
       'Bertrand Russell', 'David Hume', 'Henri Bergson',
       'Friedrich Nietzsche.', 'René Descartes', 'Frederich Schiller',
       'Carveth Read', 'St. George Stock', 'George Berkeley',
       'Surendranath Dasgupta', 'William James', 'Peter',
       'Kaiten Nukariya', 'Various', 'George Santayana',
       'Richard Falckenberg', 'Anicius Manlius Severinus Boethius',
       'Roscoe Pound', 'John Marshall', 'Thomas Taylor', 'S. M. Dubnow',
       'Emile Faguet', 'Leslie Stephen', 'Alexander Philip',
       'G.E. Partridge', 'Richard William Church', 'Victor Cousin',
       'John Morley', 'John Abercrombie', 'W. Tudor Jones',
       'J. Alexander Gunn', 'George Grote', 'Johann Gottlieb Fichte',
       'Vatsyayana', 'Mar

In [9]:
data["Author"].replace({
    "Friedrich Nietzsche": "Friedrich Wilhelm Nietzsche", 
    "F. W. Nietzsche": "Friedrich Wilhelm Nietzsche",
    "Friedrich Nietzsche.": "Friedrich Wilhelm Nietzsche",
    "Friedrich Nietzsche.": "Friedrich Wilhelm Nietzsche",
    "graf Leo Tolstoy": "Leo Tolstoy",
    "Grant Hague": "W. Grant Hague"
    }, inplace=True)

In [10]:
data['Author'].unique()

array(['Plato', 'Friedrich Wilhelm Nietzsche',
       'Karl Marx and Friedrich Engels', 'John Dewey', 'John Locke',
       'John Stuart Mill', 'Benedict de Spinoza', 'Immanuel Kant',
       'Aristotle', 'nan', 'Lewis Carroll', 'Bertrand Russell',
       'David Hume', 'Henri Bergson', 'René Descartes',
       'Frederich Schiller', 'Carveth Read', 'St. George Stock',
       'George Berkeley', 'Surendranath Dasgupta', 'William James',
       'Peter', 'Kaiten Nukariya', 'Various', 'George Santayana',
       'Richard Falckenberg', 'Anicius Manlius Severinus Boethius',
       'Roscoe Pound', 'John Marshall', 'Thomas Taylor', 'S. M. Dubnow',
       'Emile Faguet', 'Leslie Stephen', 'Alexander Philip',
       'G.E. Partridge', 'Richard William Church', 'Victor Cousin',
       'John Morley', 'John Abercrombie', 'W. Tudor Jones',
       'J. Alexander Gunn', 'George Grote', 'Johann Gottlieb Fichte',
       'Vatsyayana', 'Margaret Slattery', 'Anonymous', 'Albert Moll',
       'B.G. Jefferis and J.

In [11]:
data['Title'] = data['Title'].apply(lambda x: x[:x.find('\r')+1] if x.find('\r') != -1 else x)
data['Title'].unique()

array(['The Republic', 'Thus Spake Zarathustra', 'Beyond Good and Evil',
       'The Communist Manifesto\r', 'Democracy and Education',
       'Second Treatise of Government', 'On Liberty', 'The Ethics',
       'The Critique of Pure Reason', 'The Antichrist', 'Poetics',
       'Apology', 'An Enquiry Concerning Human Understanding',
       'Also Sprach Zarathustra', 'Utilitarianism', 'Politics',
       'Symbolic Logic', 'Euthyphro', 'The Analysis of Mind',
       'Dialogues Concerning Natural Religion', 'Theaetetus',
       'A System Of Logic', 'The Poetics',
       'An Enquiry Concerning the Principles of\r', 'Ion',
       'Laughter: An Essay on the Meaning of the\r',
       'Mysticism and Logic and Other Essays',
       'Considerations on Representative Government',
       'The Case Of Wagner', 'The Game of Logic', 'The Categories',
       'Discours de la méthode', 'The Aesthetical Essays',
       'Moral Principles in Education',
       'Kritik der reinen Vernunft (2nd Edition)\r', 'L

In [12]:
def cut_text(text_str, text_tl):
    if text_str.lower().find(text_tl.lower()) != -1:
        lng = text_str.lower().find(text_tl.lower())
        text_str = text_str[lng:]
    else:
        text_str
    return text_str

In [13]:
data['Text'] = data.apply(lambda x: cut_text(x['Text'], x['Title']),  axis=1)
data.head()

Unnamed: 0,Subject,Medium,Link,Text,Author,Title
0,Philosophy,Book,https://www.gutenberg.org/ebooks/1497,THE REPUBLIC By Plato Translated by Benjami...,Plato,The Republic
1,Philosophy,Book,https://www.gutenberg.org/ebooks/1998,THUS SPAKE ZARATHUSTRA A BOOK FOR ALL AND NON...,Friedrich Wilhelm Nietzsche,Thus Spake Zarathustra
2,Philosophy,Book,https://www.gutenberg.org/ebooks/4363,BEYOND GOOD AND EVIL By Friedrich Nietzsche ...,Friedrich Wilhelm Nietzsche,Beyond Good and Evil
3,Philosophy,Book,https://www.gutenberg.org/ebooks/61,Transcribed by Allen Lutins with assistance fr...,Karl Marx and Friedrich Engels,The Communist Manifesto\r
5,Philosophy,Book,https://www.gutenberg.org/ebooks/852,DEMOCRACY AND EDUCATION by John Dewey Tra...,John Dewey,Democracy and Education


In [14]:
# data['Language'] = data['Text'].apply(lambda x: TextBlob(x.lower()[:100]).detect_language())
# data.head()

In [15]:
# data['Language'].value_counts()
# data.drop(data.index[data['Language'] != 'en'], inplace = True)
# data['Language'].value_counts()

In [16]:
#data.to_excel("Project Gutenberg Texts.xlsx", header=True)

In [17]:
philo = pd.read_csv('1000-word-philosophy.csv')
print(philo.info())
philo.drop(['Unnamed: 0'], axis=1, inplace=True)
philo.dropna(subset=['Text'], inplace=True)
philo.reset_index(drop=True, inplace=True)
print(philo.info())
philo.head(5)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 211 entries, 0 to 210
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  211 non-null    int64 
 1   URL         211 non-null    object
 2   Text        49 non-null     object
 3   Date        211 non-null    object
 4   Author      211 non-null    object
dtypes: int64(1), object(4)
memory usage: 8.4+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49 entries, 0 to 48
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   URL     49 non-null     object
 1   Text    49 non-null     object
 2   Date    49 non-null     object
 3   Author  49 non-null     object
dtypes: object(4)
memory usage: 1.7+ KB
None


Unnamed: 0,URL,Text,Date,Author
0,https://1000wordphilosophy.com/2021/05/08/happ...,Skip to content![1000 WordPhilosophy](https://...,8 May 2021,Kiki Berk
1,https://1000wordphilosophy.com/2020/10/02/is-d...,Skip to content![1000 WordPhilosophy](https://...,2 October 2020,"Frederik Kaufman Categories: Ethics, Metaphysi..."
2,https://1000wordphilosophy.com/2016/03/07/the-...,Abortion involves the intentional killing of a...,7 March 2016,No author
3,https://1000wordphilosophy.com/2019/03/05/euth...,Skip to content![1000 WordPhilosophy](https://...,5 March 2019,Nathan Nobis
4,https://1000wordphilosophy.com/2021/02/16/prin...,Skip to content![1000 WordPhilosophy](https://...,16 February 2021,"G. M. Trujillo, Jr."


In [18]:
import re
for i in range(0, 6):
    #print(philo['Text'][i][:600])
    sub, aut = len('Submissions '), len('An Introductory Anthology Author:')
    sub0, aut0 = philo['Text'][i].find('Submissions '), philo['Text'][i].find('An Introductory Anthology Author:')
    beg, beg0 = sub0+sub+1, aut0+aut+1
    end, end0 = philo['Text'][i].find('~ 1000 Word Philosophy'), philo['Text'][i].find('Cate')
    if beg != -1 and end != -1 and beg0 != 0 and end0 !=0:
        s = philo['Text'][i][beg:end]
        print("\t", s)
        m = re.search(r"\d", s)
        # Title
        print(s[:m.start()])
        # Date
        print(s[m.start():])
        # Author
        print(philo['Text'][i][beg0:end0])

	 appiness: What is it to be Happy 8 May 202112 July 2021 
appiness: What is it to be Happy 
8 May 202112 July 2021 
Kiki Berk  
	 s Death Bad Epicurus and Lucretius on the Fear of Death 2 October 20209 May 2021 
s Death Bad Epicurus and Lucretius on the Fear of Death 
2 October 20209 May 2021 
Frederik Kaufman  
	 uthanasia, or Mercy Killing 5 March 201911 March 2021 
uthanasia, or Mercy Killing 
5 March 201911 March 2021 
Nathan Nobis  
	 rincip lis m in Biomedical Ethics: Respect for Autonomy, Non Male fic en ce,Beneficence, and Justice 16 February 202113 July 2021 
rincip lis m in Biomedical Ethics: Respect for Autonomy, Non Male fic en ce,Beneficence, and Justice 
16 February 202113 July 2021 
G. M. Trujillo, Jr.  
	 thics and the Expected Consequences of Voting 28 June 20209 March 2021 
thics and the Expected Consequences of Voting 
28 June 20209 March 2021 
Thomas Metcalf  


In [19]:
# Sep
# 'Words:', 'Word count:', ')'
for i in range(0, 4):
    print("\t")
    print(philo['Text'][i][:1100])

	
Skip to content![1000 WordPhilosophy](https://1000wordphilosophy.files.wordpress.com/2019/01/cropped-1000-word-philosophy-logo-with-subtitle-1-Cleaned-Up.jpg)1000 Word Philosophy: An Introductory Anthology Philosophy, One Thousand Words at a Time Menu   About   Newest Essays   Teaching   All Essays   Submissions Happiness: What is it to be Happy 8 May 202112 July 2021 ~ 1000 Word Philosophy: An Introductory Anthology Author: Kiki Berk  Category: Ethics, Phenomenology and Existentialism  Words: 992  Do you want to be happy If you're like most people, then yes, you do.But what is happiness What does it mean to be “happy”?[1]This essay discusses four major philosophical theories of happiness.[2]!["Mr. Happy" on the beach.](https://1000wordphilosophy.files.wordpress.com/2021/05/happiness.jpg?w=656&h=492)"Mr. Happy" on the beach. 1. Hedonism According to hedonism, happiness is simply the experience of pleasure.[3] A happy person has a lot more pleasure than displeasure (pain) in her life.

In [20]:
# Sep
# 'Words:', 'Word count:', ')'
for i in range(46, 49):
    print("\t")
    print(philo['Text'][i][:1100])

	
Skip to content![1000 WordPhilosophy](https://1000wordphilosophy.files.wordpress.com/2019/01/cropped-1000-word-philosophy-logo-with-subtitle-1-Cleaned-Up.jpg)1000 Word Philosophy: An Introductory Anthology Philosophy, One Thousand Words at a Time Menu   About   Newest Essays   Teaching   All Essays   Submissions Defining Capitalism and Socialism 17 September 20199 March 2021 ~ 1000 Word Philosophy: An Introductory Anthology Author: Thomas Metcalf  Cate gory: Social and Political Philosophy  Word count: 999Editor's Note: This essay is ' the first in a two part series authored by To mon ' the topic of ' ' ' ' ' capitalism ' ' ' ' ' and ' ' socialism ' '. The second essay, on evaluating ' ' ' ' ' capitalism ' ' ' ' ' and ' ' socialism ' ', can be viewed here.Should our society be capitalist, socialist, or something in between To adjudicate this debate, we must understand ' the definitions of '' ' ' ' ' capitalism ' ' ' ' '' and'' ' socialism ' '.'![Image of a finger pointing at an entry