In [19]:
from Programs import Program, get_semantic_embedding, get_tone_embedding
from WebScrapeText import split_sent
import pickle
import numpy as np
import tensorflow as tf

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\wmmjk\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [95]:
with open('program_list.pkl', 'rb') as file:
    program_list = pickle.load(file)

In [83]:
UserText = ['I want to be healthy and happy', 'I feel stressed, but I don\'t know what to do with it','I care deeply about growing as a person'] #this will be input from the user

In [99]:
def compare_user_prog(user_text,prog_list):
    """Generate embeddings for user text, compare to program text"""
    N_similarities = 5 #How many similarity scores to use in averaging
    N_progs = len(prog_list)
    user_text_split = []
    for text in user_text: #split user text up into sentences
        user_text_split.append(split_sent(text))
    user_sem_embeddings = [get_semantic_embedding(text) for text in user_text_split]
    user_tone_embeddings = [get_tone_embedding(text) for text in user_text_split]
    N_user_embeddings = len(user_sem_embeddings)
    top_sem_similarities_indxs = np.zeros((N_progs,N_user_embeddings,N_similarities)) # index of the top N most similar program sentences to each user sentence
    top_sem_similarities_scores = np.zeros((N_progs,N_user_embeddings,N_similarities))# score of the top N most similar program sentences to each user sentence
    top_tone_similarities_indxs = np.zeros((N_progs,N_user_embeddings,N_similarities))# index of the top N most similar program sentences to each user sentence
    top_tone_similarities_scores = np.zeros((N_progs,N_user_embeddings,N_similarities))# score of the top N most similar program sentences to each user sentence
    for p in range(N_progs):
        prog = prog_list[p]
        for i in range(N_user_embeddings): #for the ith user sentence
            prog_sem_similarities = np.zeros(len(prog.text))
            prog_tone_similarities = np.zeros(len(prog.text))
            for j in range(len(prog.text)): #for the jth program sentence
                prog_sem_similarities[j] = -tf.keras.losses.cosine_similarity(
                    user_sem_embeddings[i],prog.semantic_embeddings[j]) #calculate similarity (flip the sign bec it's defined a negative for use as a loss in tensorflow
                prog_tone_similarities[j] = -tf.keras.losses.cosine_similarity(
                    user_tone_embeddings[i],prog.tone_embeddings[j])
            indices_sem = tf.argsort(prog_sem_similarities, direction = 'DESCENDING') #sort by similarity
            indices_tone = tf.argsort(prog_tone_similarities, direction = 'DESCENDING') #sort by similarity
            top_sem_similarities_indxs[p,i,:] = indices_sem[:N_similarities] #store only the top N most similar program sentences
            top_sem_similarities_scores[p,i,:] = [prog_sem_similarities[indx] for indx in indices_sem[:N_similarities]] #pull out the scores for the top N most similar
            top_tone_similarities_indxs[p,i,:] = indices_tone[:N_similarities] #store only the top N most similar
            top_tone_similarities_scores[p,i,:] = [prog_tone_similarities[indx] for indx in indices_tone[:N_similarities]] #pull out the scores for the top N most similar
    return top_sem_similarities_indxs, top_sem_similarities_scores,top_tone_similarities_indxs, top_tone_similarities_scores
            

In [101]:
topsem_indx,topsem_score,toptone_indx,toptone_score = compare_user_prog(UserText,program_list)

In [177]:
N_progs = len(program_list)
N_user_embeddings = np.shape(topsem_indx)[1]
N_similarities = 5
for p in range(N_progs):
    prog = program_list[p]
    print(prog.name)
    for i in range(N_user_embeddings):
        print(UserText[i])
        for n in range(N_similarities):
            sem_indx = int(topsem_indx[p,i,n])
            print(f"Program Semantic Sentence #{n+1}: {prog.text[sem_indx]} Semantic Score: {topsem_score[p,i,n]: .4f}")
        print('')
        for n in range(N_similarities):
            tone_indx = int(toptone_indx[p,i,n])
            print(f"Program Tone Sentence #{n+1}: {prog.text[tone_indx]} Tone Score: {toptone_score[p,i,n]: .4f}")
        print('\n')
    print('\n\n\n')

Primal Trust
I want to be healthy and happy
Program Semantic Sentence #1: Are you tired of feeling unheard and unsupported in your quest for health and vitality? Semantic Score:  0.3873
Program Semantic Sentence #2: Here, you’ll engage with like-minded individuals in a supportive space, sharing weekly classes and interactive sessions with Dr. Cat and leading mentors. Semantic Score:  0.2453
Program Semantic Sentence #3: Find Your Community. Find Your Strength Semantic Score:  0.2363
Program Semantic Sentence #4: If you’re navigating a labyrinth of lab tests, pills, and platitudes, Primal Trust resonates with your struggle. Semantic Score:  0.2337
Program Semantic Sentence #5: What truly sets Primal Trust apart is more than just its groundbreaking approach to healing; it’s the creation of a deeply connected community. Semantic Score:  0.1861

Program Tone Sentence #1: Here, you’ll engage with like-minded individuals in a supportive space, sharing weekly classes and interactive sessions 

In [181]:
#Now let's pull out the top ten matches for each program, regardless of user sentence

summary = {'Name': [], 
           'Semantic Sentences': [], 'User Semantic Sentences':[], 'Semantic Scores':[], 'Avg Semantic Score': [],
           'Tone Sentences':[], 'User Tone Sentences':[], 'Tone Scores':[], 'Avg Tone Score': [],
           'Overall Score':[]}
for p in range(N_progs): #for each program
    summary['Name'].append(program_list[p].name)
    
    flat_sem_scores = topsem_score[p].flatten() #flatten the scores for sorting
    flat_sem_indx = topsem_indx[p].flatten() #flatten the sentence indices for reference after sorting
    flat_tone_scores = toptone_score[p].flatten()
    flat_tone_indx = toptone_indx[p].flatten()
    
    topsem_indx = tf.argsort(flat_sem_scores,direction='DESCENDING')[:10] #get indices of the top ten scores
    topsem_scores = [flat_sem_scores[indx] for indx in topsem_indx] #get the top ten scores
    topsem_sent_indx = [int(flat_sem_indx[indx]) for indx in topsem_indx] #get the indices for the top ten sentences
    sem_sentences = [program_list[p].text[indx] for indx in topsem_sent_indx] #get the top ten sentences
    user_sem_indx = np.floor([indx/N_similarities for indx in topsem_indx]).astype(int)
    user_sem_sentences = [UserText[indx] for indx in user_sem_indx]
    
    toptone_indx = tf.argsort(flat_tone_scores,direction='DESCENDING')[:10]
    toptone_scores = [flat_tone_scores[indx] for indx in toptone_indx] #get the top ten scores
    toptone_sent_indx = [int(flat_tone_indx[indx]) for indx in toptone_indx] #get the indices for the top ten sentences
    tone_sentences = [program_list[p].text[indx] for indx in toptone_sent_indx] #get the top ten sentences
    user_tone_indx = np.floor([indx/N_similarities for indx in toptone_indx]).astype(int)
    user_tone_sentences = [UserText[indx] for indx in user_tone_indx]
    
    overall_score = (0.5)*np.mean(topsem_scores) + (0.5)*np.mean(toptone_scores) #for now, equal weights to semantic and tone scores
    
    summary['Semantic Sentences'].append(sem_sentences)
    summary['User Semantic Sentences'].append(user_sem_sentences)
    summary['Semantic Scores'].append(topsem_scores)
    summary['Avg Semantic Score'].append(np.mean(topsem_scores))
    
    summary['Tone Sentences'].append(tone_sentences)
    summary['User Tone Sentences'].append(user_tone_sentences)
    summary['Tone Scores'].append(toptone_scores)
    summary['Avg Tone Score'].append(np.mean(toptone_scores))
    
    summary['Overall Score'].append(overall_score)

In [193]:
top3_indx = tf.argsort(summary['Overall Score'],direction='DESCENDING')[:3]
for i in top3_indx:
    print(f"{summary['Name'][i]}, (Overall Score {summary['Overall Score'][i]:.4f})")
    print("-"*50)
    print(f"Average Semantic Score: {summary['Avg Semantic Score'][i]:.4f}")
    for j in range(len(summary['Semantic Scores'][i])):
        print(f"PROGRAM SENTENCE: {summary['Semantic Sentences'][i][j]}")
        print(f"USER SENTENCE: {summary['User Semantic Sentences'][i][j]}")
        print(f"SEMANTIC SCORE: {summary['Semantic Scores'][i][j]:.4f}")
    print(f"\nAverage Tone Score: {summary['Avg Tone Score'][i]:.4f}")
    for j in range(len(summary['Tone Scores'][i])):
        print(f"PROGRAM SENTENCE: {summary['Tone Sentences'][i][j]}")
        print(f"USER SENTENCE: {summary['User Tone Sentences'][i][j]}")
        print(f"TONE SCORE: {summary['Tone Scores'][i][j]:.4f}")
    print('\n\n')
    

Vital Side: Rewire, (Overall Score 0.6678)
--------------------------------------------------
Average Semantic Score: 0.4579
PROGRAM SENTENCE: By getting deep into your thoughts, you can spot patterns or links that affect how calm or stressed you feel.
USER SENTENCE: I feel stressed, but I don't know what to do with it
SEMANTIC SCORE: 0.5391
PROGRAM SENTENCE: When your nervous system is out of whack, you might notice things like your heart beating fast, your hands getting sweaty, feeling really stressed all the time or dealing with stress that just doesn't go away.
USER SENTENCE: I feel stressed, but I don't know what to do with it
SEMANTIC SCORE: 0.5262
PROGRAM SENTENCE: Alt text: Girl doing Deep breath exerciseSource When you're feeling stressed, one of the quickest and most effective nervous system regulation exercises is taking deep breaths.
USER SENTENCE: I feel stressed, but I don't know what to do with it
SEMANTIC SCORE: 0.5135
PROGRAM SENTENCE: If things get out of balance, chr

In [151]:
xtest['name'].append('Jack')

In [153]:
print(xtest)

{'name': ['Jack', 'Jack'], 'scores': [], 'avg': []}


In [None]:
#Initialize semantic_similarities = []*N_UserEmbeddings
#Initialize tone_similarities = []*N_UserEmbeddings
#for prog in programlist:
# prog_semantic_similarities = np.zeros(np.shape(prog.tone_embeddings)[0],N_UserEmbedding)
# prog_semantic_similarities = np.zeros(np.shape(prog.tone_embeddings)[0],N_UserEmbedding)
#  for i in np.shape(UserSemanticEmbeddings)[0]: 
# for emb in prog.semantic_embeddings:
#  cosinesimilarity(UserSemanticEmbeddings[i],Emb)
# for emb in prog.tone_embeddings:
#  cosinesimilarity(UserToneEmbeddings[i],Emb)
#