In [1]:
# Get pandas and postgres to work together
from sqlalchemy import create_engine
import psycopg2 as pg
import pandas as pd
import numpy as np
import pickle 

# Panda overides for visuals
# pd.set_option('display.max_colwidth', 1)

# Import sklearn to do CountVectorizing and TF-IDF document-term matrix
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

# For plotting reasons
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_formats = ['svg']

# Topic Modeling
from sklearn.decomposition import TruncatedSVD
from sklearn.decomposition import NMF
from sklearn.metrics.pairwise import cosine_similarity
#LDA
from sklearn.decomposition import LatentDirichletAllocation
# logging (set to INFO)
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
# pyLDAvis
import pyLDAvis
import pyLDAvis.sklearn
pyLDAvis.enable_notebook()

# CorEx
import scipy.sparse as ss
from corextopic import corextopic as ct
from corextopic import vis_topic as vt

# Import custom topic labeler
from utilities.utilities import spacy_tokenizer, display_topics, pyLDAviz, plot_tsne

# Suppress warnings 
import warnings
warnings.filterwarnings('ignore')

# Clustering
from sklearn.preprocessing import scale
from sklearn.cluster import KMeans, DBSCAN
from sklearn.utils import shuffle

# 2D Visuals
from sklearn.decomposition import PCA
from itertools import cycle
from sklearn.manifold import TSNE

# filter words even more
import nltk

%load_ext autoreload
%autoreload 2

In [2]:
engine = create_engine('postgresql://briantam:localhost@localhost/bible')

bbe = pd.read_sql('SELECT * FROM bbe_alchemy', engine)

In [5]:
lda_topics= pd.read_pickle('lda_topics.pkl')

In [9]:
bbe.head(1)

Unnamed: 0,field,testiment,book,section,chapter,verse,text,cleaner,compound_sent,neg_sent,neu_sent,pos_sent
0,1001001,old,Genesis,Law,1,1,At the first God made the heaven and the earth.,god heaven earth,0.6597,0.0,0.597,0.403


In [None]:
pd

In [6]:
lda_topics

Unnamed: 0,"water, food, need, waste, come","law, ear, word, lord, rule","lord, land, moses, egypt, god","jesus, faith, christ, god, spirit","tree, round, robe, band, gold","shame, dark, light, bitter, low","request, come, salvation, lord, king","evil, lord, man, upright, sin","blessing, fruit, garden, vine, come","lord, word, king, come, thousand","tent, meeting, son, pity, greatly","son, wife, daughter, mother, father","gold, grain, silver, bird, crush","voice, come, heaven, sound, lord","offering, day, lord, holy, burn","judge, cubit, delight, wide, gt","peter, forgiveness, come, sheep, unchanging","love, honour, god, clear, knowledge","israel, david, child, lord, saul","wrath, place, hater, resting, lord"
0,0.01850,0.01850,0.01850,0.01850,0.01850,0.01850,0.01850,0.01850,0.01850,0.01850,0.01850,0.01850,0.01850,0.64844,0.01850,0.01850,0.01850,0.01850,0.01850,0.01850
1,0.75304,0.01300,0.01300,0.01300,0.01300,0.01300,0.01300,0.01300,0.01300,0.01300,0.01300,0.01300,0.01300,0.01300,0.01300,0.01300,0.01300,0.01300,0.01300,0.01300
2,0.02024,0.02024,0.02024,0.02024,0.02024,0.61536,0.02024,0.02024,0.02024,0.02024,0.02024,0.02024,0.02024,0.02024,0.02024,0.02024,0.02024,0.02024,0.02024,0.02024
3,0.01504,0.01504,0.01504,0.01504,0.01504,0.71415,0.01504,0.01504,0.01504,0.01504,0.01504,0.01504,0.01504,0.01504,0.01504,0.01504,0.01504,0.01504,0.01504,0.01504
4,0.01456,0.01456,0.01456,0.01456,0.01456,0.01456,0.01456,0.01456,0.01456,0.01456,0.01456,0.01456,0.01456,0.01456,0.72338,0.01456,0.01456,0.01456,0.01456,0.01456
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31098,0.32193,0.01241,0.01241,0.01241,0.01241,0.01241,0.01241,0.01241,0.01241,0.01241,0.01241,0.01241,0.01241,0.34969,0.01241,0.11737,0.01241,0.01241,0.01241,0.01241
31099,0.01354,0.47982,0.01354,0.01354,0.01354,0.01354,0.01354,0.27638,0.01354,0.01354,0.01354,0.01354,0.01354,0.01354,0.01354,0.01354,0.01354,0.01354,0.01354,0.01354
31100,0.01309,0.66097,0.01309,0.01309,0.10350,0.01309,0.01309,0.01309,0.01309,0.01309,0.01309,0.01309,0.01309,0.01309,0.01309,0.01309,0.01309,0.01309,0.01309,0.01309
31101,0.01397,0.01397,0.01397,0.73459,0.01397,0.01397,0.01397,0.01397,0.01397,0.01397,0.01397,0.01397,0.01397,0.01397,0.01397,0.01397,0.01397,0.01397,0.01397,0.01397


# Recommendaton

In [1]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [None]:
#load VADER
analyzer = SentimentIntensityAnalyzer()
#Add VADER metrics to dataframe
df['compound'] = [analyzer.polarity_scores(v)['compound'] for v in df['post_no_links']]
df['neg'] = [analyzer.polarity_scores(v)['neg'] for v in df['post_no_links']]
df['neu'] = [analyzer.polarity_scores(v)['neu'] for v in df['post_no_links']]
df['pos'] = [analyzer.polarity_scores(v)['pos'] for v in df['post_no_links']]

# Recommendation systems

Lets focus on building a recommendation systems for bible verses. Given a topic, what verses would you recommend?

`field.4` is the verse  
`cleaner` column is the raw base words used for topic modeling  
`book` book of the bible  
`chapter` chapter of the book  
`verse` verse of the chapter  
`testiment` old/new testiment  

In [10]:
bbe['id'] = [f'{bbe.book[x]} {bbe.chapter[x]} {bbe.verse[x]}' for x in bbe.index]

In [11]:
bbe.head(3)

Unnamed: 0,field,testiment,book,section,chapter,verse,text,cleaner,compound_sent,neg_sent,neu_sent,pos_sent,id
0,1001001,old,Genesis,Law,1,1,At the first God made the heaven and the earth.,god heaven earth,0.6597,0.0,0.597,0.403,Genesis 1 1
1,1001002,old,Genesis,Law,1,2,And the earth was waste and without form; and ...,earth waste form dark face deep spirit god fac...,0.0,0.081,0.809,0.11,Genesis 1 2
2,1001003,old,Genesis,Law,1,3,"And God said, Let there be light: and there wa...",god let light light,0.2732,0.0,0.826,0.174,Genesis 1 3


## Get Distances
Let's look at the distances between these beers in the PCA-13 space.

In [14]:
# import distance methods
from sklearn.metrics import pairwise_distances

In [16]:
lda_topics.head()

Unnamed: 0,"water, food, need, waste, come","law, ear, word, lord, rule","lord, land, moses, egypt, god","jesus, faith, christ, god, spirit","tree, round, robe, band, gold","shame, dark, light, bitter, low","request, come, salvation, lord, king","evil, lord, man, upright, sin","blessing, fruit, garden, vine, come","lord, word, king, come, thousand","tent, meeting, son, pity, greatly","son, wife, daughter, mother, father","gold, grain, silver, bird, crush","voice, come, heaven, sound, lord","offering, day, lord, holy, burn","judge, cubit, delight, wide, gt","peter, forgiveness, come, sheep, unchanging","love, honour, god, clear, knowledge","israel, david, child, lord, saul","wrath, place, hater, resting, lord"
0,0.0185,0.0185,0.0185,0.0185,0.0185,0.0185,0.0185,0.0185,0.0185,0.0185,0.0185,0.0185,0.0185,0.64844,0.0185,0.0185,0.0185,0.0185,0.0185,0.0185
1,0.75304,0.013,0.013,0.013,0.013,0.013,0.013,0.013,0.013,0.013,0.013,0.013,0.013,0.013,0.013,0.013,0.013,0.013,0.013,0.013
2,0.02024,0.02024,0.02024,0.02024,0.02024,0.61536,0.02024,0.02024,0.02024,0.02024,0.02024,0.02024,0.02024,0.02024,0.02024,0.02024,0.02024,0.02024,0.02024,0.02024
3,0.01504,0.01504,0.01504,0.01504,0.01504,0.71415,0.01504,0.01504,0.01504,0.01504,0.01504,0.01504,0.01504,0.01504,0.01504,0.01504,0.01504,0.01504,0.01504,0.01504
4,0.01456,0.01456,0.01456,0.01456,0.01456,0.01456,0.01456,0.01456,0.01456,0.01456,0.01456,0.01456,0.01456,0.01456,0.72338,0.01456,0.01456,0.01456,0.01456,0.01456


In [74]:
# Try metric='euclidean' and metric='cosine', and see what changes!
target = 23312
dists = pairwise_distances(lda_topics.iloc[[target]],lda_topics, metric='cosine').argsort()[0].tolist()
dists.remove(target)
recommend = dists[0]
bbe.iloc[recommend].text

'Give us every day bread for our needs.'

In [59]:
print(bbe.iloc[25415].text)
bbe.iloc[23312].text

Give us every day bread for our needs.


'Give us this day bread for our needs.'

In [53]:
bbe[(bbe.book=='Matthew') & 
    (bbe.chapter==6) & 
    (bbe.verse==11)]

Unnamed: 0,field,testiment,book,section,chapter,verse,text,cleaner,compound_sent,neg_sent,neu_sent,pos_sent,id
23312,40006011,new,Matthew,Gospels,6,11,Give us this day bread for our needs.,day bread need,0.0,0.0,1.0,0.0,Matthew 6 11


In [44]:
bbe

Unnamed: 0,field,testiment,book,section,chapter,verse,text,cleaner,compound_sent,neg_sent,neu_sent,pos_sent,id
0,1001001,old,Genesis,Law,1,1,At the first God made the heaven and the earth.,god heaven earth,0.6597,0.000,0.597,0.403,Genesis 1 1
1,1001002,old,Genesis,Law,1,2,And the earth was waste and without form; and ...,earth waste form dark face deep spirit god fac...,0.0000,0.081,0.809,0.110,Genesis 1 2
2,1001003,old,Genesis,Law,1,3,"And God said, Let there be light: and there wa...",god let light light,0.2732,0.000,0.826,0.174,Genesis 1 3
3,1001004,old,Genesis,Law,1,4,"And God, looking on the light, saw that it was...",god look light good god division light dark,0.7269,0.000,0.717,0.283,Genesis 1 4
4,1001005,old,Genesis,Law,1,5,"Naming the light, Day, and the dark, Night. An...",light day dark night evening morning day,0.0000,0.000,1.000,0.000,Genesis 1 5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
31098,66022017,new,Revelation,Prophecy,22,17,"And the Spirit and the bride say, Come. And le...",spirit bride come let ear come let need come l...,0.5574,0.000,0.881,0.119,Revelation 22 17
31099,66022018,new,Revelation,Prophecy,22,18,For I say to every man to whose ears have come...,man ear come word prophet book man addition go...,-0.1779,0.072,0.874,0.054,Revelation 22 18
31100,66022019,new,Revelation,Prophecy,22,19,And if any man takes away from the words of th...,man away word book god away tree life holy tow...,0.2732,0.000,0.945,0.055,Revelation 22 19
31101,66022020,new,Revelation,Prophecy,22,20,"He who gives witness to these things says, Tru...",witness thing truly come quickly come lord jesus,0.4404,0.000,0.838,0.162,Revelation 22 20
