# Component 3 :: Analyze from Cache

Goal of notebook ::
- Access the cache used previously
- Iteratively step through designated list of subreddits
- Aggregate metrics for each entity known to that subreddit


References ::
- <a href="https://docs.python.org/3/library/collections.html#collections.Counter">Collections :: Counter()</a>
- <a href="https://cloud.google.com/natural-language/docs/analyzing-entity-sentiment">Analyzing Entity Sentiment </a>

Notes Regarding Sentiment Metrics
- 

## Setup File Access & Import Cache

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import json
try:
    with open('/content/drive/Shareddrives/LHS712 Project [NLP]/data/data_hoard.json', 'r') as infile:
        entity_cache = json.load(infile)
        print("entity_cache :: data_hoard.json found and loaded")
except:
    print("\033[91mdata_hoard.json NOT FOUND\x1b[0m")

Mounted at /content/drive
entity_cache :: data_hoard.json found and loaded


In [None]:
entity_cache.keys()

dict_keys(['msaugy', 'guriwq1', 'gurghwo', 'gurspvm', 'guru33p', 'gurx791', 'gus2412', 'gurgb1l', 'gus5ntm', 'gfd094', 'g3nr8u6', 'ftoljd0', 'fzszav3', 'fwsuhfx', 'g9ivzdg', 'g0jgbdj', 'g2k437h', 'g4id11t', 'fqzuhnx', 'g22g755', 'fsv45hu', 'fzkxsem', 'g0qw4ka', 'g9064z9', 'fq59spg', 'fvaeeyc', 'fuo2iim', 'g15udzx', 'g1tdyys', 'gawduut', 'gay55z4', 'g8qpezn', 'g0funbl', 'g9r2o7a', 'g0odroc', 'g3k11vt', 'fr9an9t', 'fr7r7hi', 'g2gcrfw', 'g3o0oj5', 'g4se2ww', 'g7dx52r', 'ftb8qaq', 'g06l6hc', 'g0qx6fs', 'gawb93h', 'fq8bg2t', 'fuo4esf', 'g1eec5y', 'g1xzoa0', 'gay92wh', 'g8qtgfs', 'fr9au5u', 'fq8t7t6', 'fsz68x9', 'fsz7pgi', 'e4nl8o', 'f9ei4d6', 'f9gnosl', 'f9lfwc2', 'f9lw483', 'fae7y4q', 'faa12b1', 'f9jeek9', 'f9gh4uo', 'f9hztbi', 'f9l524e', 'faov6t9', 'fa8c0v1', 'f9ew4c0', 'f9epmhy', 'f9f6a3g', 'f9nex2k', 'f9el3tm', 'f9jlsyy', 'f9zrnzi', 'fc3ivs5', 'fcn3cqi', 'fd60lxc', 'f9dl6ax', 'f9gm6wd', 'f9l6xfv', 'f9ssfn2', 'f9vna8k', 'fa9jtb8', 'fb6tv86', 'fb72q9j', 'fbth3dy', 'fbw7t0s', 'fcqupk5', 'f

In [None]:
from collections import Counter, defaultdict

def summarize_cache(cache):
    subredditCounter = Counter()
    itemTypeCounter = Counter()

    print(f"{len(cache.keys())} total keys in cache")
    for item_id in cache.keys():
        focal_row = cache[item_id]
        subredditCounter[focal_row['subreddit']] += 1
        itemTypeCounter[focal_row['type']] += 1

    print(subredditCounter)
    print(itemTypeCounter)

summarize_cache(cache=entity_cache)

24174 total keys in cache
Counter({'trees': 2440, 'depression': 2111, 'bipolar': 2026, 'Anxiety': 1909, 'Drugs': 1459, 'REDDITORSINRECOVERY': 1349, 'Petioles': 1299, 'LSD': 1097, 'Psychonaut': 1055, 'leaves': 950, 'addiction': 911, 'opiates': 867, 'EatingDisorders': 845, 'microdosing': 843, 'meth': 798, 'Stims': 775, 'heroin': 775, 'MDMA': 766, 'DMT': 752, 'ketamine': 603, 'cocaine': 544})
Counter({'comment': 16588, 'post': 7586})


In [None]:
def cache_comparisons(cache):
    track_dict = {}
    for item_id in cache.keys():
        subreddit = cache[item_id]['subreddit'].lower() 
        if(subreddit not in track_dict.keys()):
            track_dict[subreddit]= {
                "postCount":0,
                "commentCount":0,
                "all_entity_instances":[],
                "dead_keys":[], #a dead key meants there are no entities associated for the query call
                "entitySentimentCount":0,
                "entityNoSentimentCount":0
            }

        if(cache[item_id]['type']=='post'):
            track_dict[subreddit]['postCount']+=1
        elif(cache[item_id]['type']=='comment'):
            track_dict[subreddit]['commentCount']+=1
        else:
            assert(0)

        #### We're going to sequester out any entity-free IDs at this point via try-except
        try:
            #### Option A :: keep list of lists, where ents of an original document are kept together
            # track_dict[subreddit]['all_entity_instances'].append(cache[item_id]['response']['entities'])

            #### Option B :: produce singular "flattened" list of all ents
            for entity_instance in cache[item_id]['response']['entities']:
                track_dict[subreddit]['all_entity_instances'].append(entity_instance)
                if(entity_instance['sentiment']=={}):
                    track_dict[subreddit]['entityNoSentimentCount']+=1
                else:
                    track_dict[subreddit]['entitySentimentCount']+=1
        except:
            track_dict[subreddit]['dead_keys'].append(item_id)
            #print(item_id, cache[item_id]['response'])

    #### Comparison
    for key in track_dict.keys():
        print(f"r/{key}")
        print(f"    {track_dict[key]['postCount']} original posts")
        print(f"    {track_dict[key]['commentCount']} top-level comments")
        
        print(f"    {len(track_dict[key]['dead_keys'])} total dead-keys (no entities identified)")
        print(f"    {len(track_dict[key]['all_entity_instances'])} total entity mentions")

        #### Note that these are an instance of a post containing an instance mention ###
        # One post with 18 mentions of "piano"? Here, counted as one.                   #
        # Seven posts with two mentions of "piano" each? Here, counted as seven.        #
        #################################################################################

        print(f"\t{track_dict[key]['entitySentimentCount']} entities with recognized sentiment")
        print(f"\t{track_dict[key]['entityNoSentimentCount']} entities lacking recognized sentiment")
        print("="*80)

    return(track_dict)

ents_by_reddit = cache_comparisons(cache=entity_cache)

r/leaves
    557 original posts
    393 top-level comments
    54 total dead-keys (no entities identified)
    15385 total entity mentions
	10793 entities with recognized sentiment
	4592 entities lacking recognized sentiment
r/petioles
    507 original posts
    792 top-level comments
    155 total dead-keys (no entities identified)
    16468 total entity mentions
	10762 entities with recognized sentiment
	5706 entities lacking recognized sentiment
r/trees
    260 original posts
    2180 top-level comments
    463 total dead-keys (no entities identified)
    8763 total entity mentions
	4814 entities with recognized sentiment
	3949 entities lacking recognized sentiment
r/anxiety
    747 original posts
    1162 top-level comments
    127 total dead-keys (no entities identified)
    26093 total entity mentions
	17233 entities with recognized sentiment
	8860 entities lacking recognized sentiment
r/depression
    748 original posts
    1363 top-level comments
    155 total dead-keys (no ent

In [None]:
#ents_by_reddit.keys()
#len(ents_by_reddit.keys())
#ents_by_reddit['leaves'].keys()
# [(i['name'], i['sentiment']) for i in ents_by_reddit['leaves']['all_entity_instances']]

In [None]:
#### New Structure
# entityDict
# - subreddit
# - - term
# - - - [{magnitude, score} ... {magnitude, score}]

def getEntityDict(entByReddit):
    entityDict = { x:{} for x in entByReddit.keys()}
    for subreddit in entByReddit.keys():
        for doc_entity in entByReddit[subreddit]['all_entity_instances']:
            if(doc_entity['sentiment'] != {}):
                entity_name = doc_entity['name']
                if(entity_name not in entityDict[subreddit].keys()):
                    entityDict[subreddit][entity_name] = []
                entityDict[subreddit][entity_name].append(doc_entity['sentiment'])
    return(entityDict)

entityDict = getEntityDict(entByReddit=ents_by_reddit)

In [None]:
print(list(entityDict.keys())[:10])
print('\t',list(entityDict['leaves'].keys())[:10])
print('\t\t',entityDict['leaves']['Delta 8'])

['leaves', 'petioles', 'trees', 'anxiety', 'depression', 'drugs', 'stims', 'psychonaut', 'microdosing', 'lsd']
	 ['flower', 'idea', 'challenge', 'risk', 'coping skills', 'Delta 8', 'skills', 'Leaves', 'problems', 'something']
		 [{'magnitude': 0.8}, {'magnitude': 0.3, 'score': 0.3}, {'magnitude': 0.8, 'score': 0.4}]


# Grouping Entities (Contracting Variations) & Sentiment Aggregation

Current Goals
* Allow for the use of RegEx to contract variations // group up matching for aggregation
* Handles filtering out unwanted cases via substrings
* Encode logic for averaging sentiment scores for entities

Future Goals
* Add support for alternative reduction approaches like dictionary-based mapping, lemmer/stemmer
* Experiment with Levenshtein edit distance for reduction as toggleable parameter for reducing, with complementary "ignore" list/dict for override in cases of visibly incorrent behavior


In [None]:
def reducer(pattern, entDict, subreddit_list, doesNotContain=[], v1=0, v2=0, v3=0):
    # V1 = print pattern matches
    # V2 = print exceptions to matches containing banned substring
    # V3 = print unique matches set
    import re
    from collections import defaultdict

    matches = { each:{'unique':set(), 'entries':[]} for each in subreddit_list }
    for subreddit in subreddit_list:
        for term in entDict[subreddit]:
            results = re.search(pattern, term)
            if(results != None):
                #### If any instance of forbidden substring, don't add
                violationCount = sum([i.lower() in term.lower() for i in doesNotContain])
                if(violationCount>0):
                    if(v2):print(f"    Violation Caught :: '{term}' contains illegal substring")
                else:
                    if(v1): print(f'Term Matched in r/{subreddit} :: "{term}"')
                    matches[subreddit]['unique'].add(term)
                    #### Unpack however many entries may be stored
                    for entry in entDict[subreddit][term]:
                        matches[subreddit]['entries'].append(entry)
    print("="*120)
    
    print(f"Matching pattern :: '{pattern}'")
    for subreddit in matches:
        if(matches[subreddit]['unique']!=set()):
            if(v3):print(f"r/{subreddit.title()} unique expressions matched:\n    {matches[subreddit]['unique']}")
        else:
            if(v3):print(f"r/{subreddit.title()} unique expressions matched:\n    None Found")
    if(v3):print("="*120)
    return(matches)

#### okay so implementing a does-not-contain filter... this is legal:
# sum([False,False,True,True,False]) --> 2, sum of affirmative boolean cases
# we can sum( [i in currentTerm for i in ForbiddenTerms] )
# and conditionalize to only execute when sum isn't higher than zero
# and report illegal substrings otherwise

def AverageSentiments(reduced_dict):
    def handleScore(instance):
        # When magnitude is present but score seems missing, it's a 0 that didn't properly
        # store during the serializations of the protobuf google response api object 
        try:
            return(instance['score'])
        except:
            return(0)

    for subreddit in reduced:
        scores = [handleScore(i) for i in reduced[subreddit]['entries']]
        try:
            avgSentiment = round( sum(scores)/len(scores), 6)
        except:
            #### Division by Zero, Exception
            avgSentiment = None
        #print(f"r/{subreddit}")
        print(f"r/{subreddit}\n    Average Sentiment: {avgSentiment} from {len(scores)} detections")
        #print('-'*120)

# Run Analysis

In [None]:
for each in range(0, len(entityDict.keys()), 8):
    print(list(entityDict.keys())[each:each+8])

['leaves', 'petioles', 'trees', 'anxiety', 'depression', 'drugs', 'stims', 'psychonaut']
['microdosing', 'lsd', 'dmt', 'redditorsinrecovery', 'addiction', 'opiates', 'cocaine', 'mdma']
['ketamine', 'heroin', 'meth', 'bipolar', 'eatingdisorders']


***
<b>Averaged Entity-Level Sentiment Per Subreddit</b></br>
* Running the pattern `""` just results in all entities being passed through in the `reducer` function, so passing `entityDict.keys()` in as the subreddit-list designation gets averaged entity-sentiment for all entities across all subreddits:

In [None]:
reduced = reducer(pattern="",
                  entDict=entityDict, 
                  subreddit_list=entityDict.keys(),
                  doesNotContain=[],
                  v1=0, v2=0, v3=0)
AverageSentiments(reduced_dict=reduced)

Matching pattern :: ''
r/leaves
    Average Sentiment: 0.04527 from 10793 detections
r/petioles
    Average Sentiment: 0.096673 from 10762 detections
r/trees
    Average Sentiment: 0.07983 from 4814 detections
r/anxiety
    Average Sentiment: -0.035043 from 17233 detections
r/depression
    Average Sentiment: -0.110397 from 19333 detections
r/drugs
    Average Sentiment: 0.049192 from 7367 detections
r/stims
    Average Sentiment: 0.040279 from 3734 detections
r/psychonaut
    Average Sentiment: 0.08349 from 9443 detections
r/microdosing
    Average Sentiment: 0.121822 from 4853 detections
r/lsd
    Average Sentiment: 0.098478 from 3811 detections
r/dmt
    Average Sentiment: 0.081226 from 3132 detections
r/redditorsinrecovery
    Average Sentiment: 0.040945 from 11508 detections
r/addiction
    Average Sentiment: -0.032482 from 10843 detections
r/opiates
    Average Sentiment: -0.014541 from 6891 detections
r/cocaine
    Average Sentiment: 0.053377 from 1229 detections
r/mdma
    Aver

***
<b>Analysis of Family</b></br>

In [None]:
reduced = reducer(pattern="[fF]amily.(?!gathering|lineage|breakup|shit|vacation|friend|cause|messages|business|support)", entDict=entityDict, 
                  subreddit_list=["anxiety","depression","addiction"],
                  doesNotContain=['physician','doctor', 'medicine','services',
                                  'nothing','trips','history','breakfast'], v1=0, v2=0, v3=1)
AverageSentiments(reduced_dict=reduced)

Matching pattern :: '[fF]amily.(?!gathering|lineage|breakup|shit|vacation|friend|cause|messages|business|support)'
r/Anxiety unique expressions matched:
    {'family topics', 'family member', 'family members'}
r/Depression unique expressions matched:
    {'family conversations', 'family.', 'family member', 'family members', 'family issues', 'familys'}
r/Addiction unique expressions matched:
    {'family member', 'family pics', 'family members'}
r/anxiety
    Average Sentiment: 0.044444 from 9 detections
r/depression
    Average Sentiment: 0.01 from 10 detections
r/addiction
    Average Sentiment: 0.0 from 3 detections


***
<b>Analysis of Smoking</b></br>

Some good variation here, worth including in report

In [None]:
reduced = reducer(pattern="[Ss]moke(?! [Ss]pot| shop| damage| generator| break| box|.+device|.+everyday)",
                  entDict=entityDict, 
                  subreddit_list=['trees','petioles','leaves','addiction','redditorsinrecovery','psychonaut','anxiety','depression'],
                  doesNotContain=['campus','chain','fag','ex-','ex ',"boyfriend"], v1=0, v2=0, v3=0)
AverageSentiments(reduced_dict=reduced)

Matching pattern :: '[Ss]moke(?! [Ss]pot| shop| damage| generator| break| box|.+device|.+everyday)'
r/trees
    Average Sentiment: 0.125 from 16 detections
r/petioles
    Average Sentiment: 0.020833 from 96 detections
r/leaves
    Average Sentiment: -0.001351 from 74 detections
r/addiction
    Average Sentiment: 0.042857 from 14 detections
r/redditorsinrecovery
    Average Sentiment: -0.06 from 5 detections
r/psychonaut
    Average Sentiment: -0.071429 from 7 detections
r/anxiety
    Average Sentiment: -0.05 from 2 detections
r/depression
    Average Sentiment: None from 0 detections


***
<b>Analysis of Relapse</b></br>
* good "sanity-check" of results in the sense that generally negative outcome expected

In [None]:
reduced = reducer(pattern="relapse",
                  entDict=entityDict, 
                  subreddit_list=['eatingdisorders','addiction','redditorsinrecovery','leaves','opiates'],
                  doesNotContain=[],
                  v1=0, v2=1, v3=1)
AverageSentiments(reduced_dict=reduced)

Matching pattern :: 'relapse'
r/Eatingdisorders unique expressions matched:
    {'restriction relapse', 'relapse', 'relapses', 'relapse cycle'}
r/Addiction unique expressions matched:
    {'relapse', 'relapses', 'https://old.reddit.com/r/addiction/comments/j1v442/i_want_this_to_have_been_my_last_ever_relapse_to/', 'relapser', 'relapsed', 'relapse specialist', 'binge relapse', 'relapse avoidance abilities', 'drug relapse'}
r/Redditorsinrecovery unique expressions matched:
    {'relapse', 'relapse cycles', 'relapses', 'relapser', 'people relapse', 'relapsed'}
r/Leaves unique expressions matched:
    {'relapse plan', 'relapse im living', 'relapse', 'relapses'}
r/Opiates unique expressions matched:
    {'relapse'}
r/eatingdisorders
    Average Sentiment: -0.235 from 20 detections
r/addiction
    Average Sentiment: -0.093333 from 45 detections
r/redditorsinrecovery
    Average Sentiment: -0.04375 from 64 detections
r/leaves
    Average Sentiment: -0.12381 from 21 detections
r/opiates
    Av

***
<b>Analysis of Future</b></br>

* not exactly the strongest output but it's something

In [None]:
reduced = reducer(pattern="[Ff]uture",
                  entDict=entityDict, 
                  subreddit_list=['leaves','anxiety','depression','psychonaut'],
                  doesNotContain=["cyborg","Twack","Back to"],
                  v1=0, v2=0, v3=0)
AverageSentiments(reduced_dict=reduced)

Matching pattern :: '[Ff]uture'
r/leaves
    Average Sentiment: 0.1 from 4 detections
r/anxiety
    Average Sentiment: -0.1 from 2 detections
r/depression
    Average Sentiment: -0.290909 from 11 detections
r/psychonaut
    Average Sentiment: 0.066667 from 3 detections


*** 
<b>Analysis of Sleep</b></br>
In this case if we toggle on v3 we can see there's a lot of different terms here, but if we really want the broader spectrum of encapsulated for how sleep is represented in these communities' discussions, talk of other facets of sleep than just the term itself (apnea,  medication, lack thereof) are arguably valid for inclusion to more hollistically represent the discussions of sleep and their sentiment with broader contextual inclusion.

In [None]:
reduced = reducer(pattern="[Ss]leep",
                  entDict=entityDict, 
                  subreddit_list=['anxiety','bipolar','depression','eatingdisorders',
                  'trees','leaves','petioles','stims','meth','addiction','redditorsinrecovery',
                  'drugs'],
                  doesNotContain=['SleeplessInSeattle'],
                  v1=0, v2=0, v3=1)
AverageSentiments(reduced_dict=reduced)

Matching pattern :: '[Ss]leep'
r/Anxiety unique expressions matched:
    {'sleep deprivation', 'sleeping schedule', 'sleep aids', 'sleep', 'sleep apena', 'sleeping pill', 'sleep paralysis', 'Sleep disturbance', 'sleep health', 'sleep apnea', 'sleep aid', 'Sleep', 'sleeping', 'sleep disturbances', 'sleep schedule', 'sleeping pills'}
r/Bipolar unique expressions matched:
    {'sleep issues', 'sleep quality', 'sleeping schedule', 'sleep', 'sleep apnea', 'sleep posture', 'sleep med', 'sleeping disturbances', 'phase sleep', 'sleeping meds', 'sleep routine', 'sleep hygiene', 'Cant sleep', 'Relax and Sleep Well', 'sleep schedule', 'sleeping pills'}
r/Depression unique expressions matched:
    {'sleep', 'sleep apnea', 'sleep paralysis', 'sleeping routine', 'sleep dept', 'sleeping', 'sleep schedule', 'Sleep deprivation'}
r/Eatingdisorders unique expressions matched:
    {'sleepover', 'sleep', 'sleep aid', 'sleep schedules', 'sleep specialist', 'sleep schedule', 'sleep eating'}
r/Trees unique ex

***
<b>Analysis of Habits</b></br>
Again, under v3=1, we can see an array of habit-inclusive-and-oriented terms being aggregated here, but all appear related to topical variants relevant to the peculiar habits of the respective communities.

In [None]:
reduced = reducer(pattern="[Hh]abit",
                  entDict=entityDict, 
                  subreddit_list=['leaves','petioles','anxiety','depression','drugs',
                                  'redditorsinrecovery','addiction','eatingdisorders'],
                  doesNotContain=[],
                  v1=0, v2=0, v3=1)
AverageSentiments(reduced_dict=reduced)

Matching pattern :: '[Hh]abit'
r/Leaves unique expressions matched:
    {'marijuana habit cold-turkey', '30yr habit', 'smoking habit', 'smoking habits', 'habits', 'habit'}
r/Petioles unique expressions matched:
    {'weed habits', 'sleep habits', 'habitats', 'weed habit', 'smoking habit', 'habits', 'habit'}
r/Anxiety unique expressions matched:
    {'habits', 'habit', 'thought habit'}
r/Depression unique expressions matched:
    {'habits', 'habit'}
r/Drugs unique expressions matched:
    {'habit', 'drug habits'}
r/Redditorsinrecovery unique expressions matched:
    {'coke habit', '16yr opioid habit', 'party habit', 'eating habits', 'habits', 'habit'}
r/Addiction unique expressions matched:
    {'cocaine habit', 'masturbation habits', 'eating habits', 'habits', 'habit'}
r/Eatingdisorders unique expressions matched:
    {'food habits', 'compulsion eating habit', 'eating habit', 'binging habits', 'eating habits', 'exercise habits', 'lil habit', 'habits', 'habit'}
r/leaves
    Average Sent

***
<b>Analysis of Weed/Pot/Marijuana/MJ/etc in r/trees, r/petioles, r/weed, r/anxiety, r/bipolar, r/depression, r/eatingdisordrers</b></br>
- specifically compare nature of these communities


In [None]:
reduced = reducer(pattern="[Cc]annabis|[Mm]arijuana|[Pp]ot |[Ww]eed",
                  entDict=entityDict, 
                  subreddit_list=['anxiety','bipolar','depression','eatingdisorders',
                                  'trees','petioles','leaves',
                                  'addiction','redditorsinrecovery'],
                  doesNotContain=['http', 'protocol', 'dispensary', 'club', 'gang', 'spot'],
                  v1=0, v2=0, v3=1)
AverageSentiments(reduced_dict=reduced)

Matching pattern :: '[Cc]annabis|[Mm]arijuana|[Pp]ot |[Ww]eed'
r/Anxiety unique expressions matched:
    {'marijuana', 'weed dick', 'pot belly', 'weed'}
r/Bipolar unique expressions matched:
    {'smoking weed', 'weekends - weed', 'cannabis', 'smoke weed', 'weed'}
r/Depression unique expressions matched:
    {'cannabis', 'weed'}
r/Eatingdisorders unique expressions matched:
    {'weed girl', 'weed'}
r/Trees unique expressions matched:
    {'weed tree', 'blackmarket weed carts', 'cannabis industry', 'weed cherry', 'tasting weeds', 'delivery weed', 'weed.', 'Volcannabis', 'weed smokers', 'cannabis concentrate', 'weed gods', 'Cannabis laws', 'weed gummies', 'tea w weed butter', 'Marijuana', 'ditch weed', 'cannabis', 'weed caramels', 'weeds', 'cash selling weed', 'cannabis possession', 'smoking weed', 'Cannabis LOLZ', '#legalizeCannabis', 'cannabis delivery', 'Weed', 'Weeds', 'cannabis caution system', 'Dude weed', 'weed', 'weed cookies', 'grade weed', 'weed snobs', 'cannabis vapes', 'mari

***
<b>Analysis of Feelings</b></br>

* V3 on, we can see that this combines a blend of physical and possibly-emotional feelings
* talk about highest and lowest


In [None]:
reduced = reducer(pattern="feel",
                  entDict=entityDict, 
                  subreddit_list=entityDict.keys(),
                  doesNotContain=['http'],
                  v1=0, v2=0, v3=0)
AverageSentiments(reduced_dict=reduced)

Matching pattern :: 'feel'
r/leaves
    Average Sentiment: -0.045588 from 68 detections
r/petioles
    Average Sentiment: 0.011429 from 70 detections
r/trees
    Average Sentiment: 0.2 from 9 detections
r/anxiety
    Average Sentiment: -0.094737 from 133 detections
r/depression
    Average Sentiment: -0.164655 from 116 detections
r/drugs
    Average Sentiment: 0.075676 from 37 detections
r/stims
    Average Sentiment: -0.1875 from 8 detections
r/psychonaut
    Average Sentiment: -0.007273 from 55 detections
r/microdosing
    Average Sentiment: 0.14375 from 16 detections
r/lsd
    Average Sentiment: 0.096429 from 28 detections
r/dmt
    Average Sentiment: 0.1875 from 16 detections
r/redditorsinrecovery
    Average Sentiment: 0.097222 from 36 detections
r/addiction
    Average Sentiment: -0.046667 from 45 detections
r/opiates
    Average Sentiment: 0.081818 from 22 detections
r/cocaine
    Average Sentiment: 0.133333 from 3 detections
r/mdma
    Average Sentiment: -0.042857 from 14 detec

***
<b>Analysis of Food</b></br>

* can maybe infer something about eating behaviors, dieting


In [None]:
reduced = reducer(pattern="[Ff]ood",
                  entDict=entityDict, 
                  subreddit_list=entityDict.keys(),
                  doesNotContain=['http','alcohol','dehydrator','candles','cat','dog','pet',
                                  'stamps','delivery idea','processor','store'],
                  v1=0, v2=0, v3=1)
AverageSentiments(reduced_dict=reduced)

Matching pattern :: '[Ff]ood'
r/Leaves unique expressions matched:
    {'junk food', 'foods', 'food', 'junkfood', 'fatty foods', 'binge food', 'order food'}
r/Petioles unique expressions matched:
    {'junk food', 'delivery food', 'foods', 'fast food', 'food'}
r/Trees unique expressions matched:
    {'foods'}
r/Anxiety unique expressions matched:
    {'junk food', 'foods', 'food place', 'food', 'food poisoning'}
r/Depression unique expressions matched:
    {'food shopping', 'food stuff', 'fast food', 'food banks', 'food charities', 'food', 'food poisoning'}
r/Drugs unique expressions matched:
    {'Food', 'foods', 'food taste', 'xD Food', 'food'}
r/Stims unique expressions matched:
    {'food'}
r/Psychonaut unique expressions matched:
    {'food addiction', 'foods', 'plant foods', 'food', 'food poisoning'}
r/Microdosing unique expressions matched:
    {'food'}
r/Lsd unique expressions matched:
    {'food'}
r/Dmt unique expressions matched:
    {'food pantries', 'food'}
r/Redditorsinrec

In [None]:
reduced = reducer(pattern="[Aa]ppetite",
                  entDict=entityDict, 
                  subreddit_list=['leaves','petioles','anxiety','depression','eatingdisorders','stims'],
                  doesNotContain=['http'],
                  v1=0, v2=0, v3=0)
AverageSentiments(reduced_dict=reduced)

Matching pattern :: '[Aa]ppetite'
r/leaves
    Average Sentiment: -0.442105 from 19 detections
r/petioles
    Average Sentiment: -0.066667 from 30 detections
r/anxiety
    Average Sentiment: -0.5125 from 8 detections
r/depression
    Average Sentiment: -0.46 from 5 detections
r/eatingdisorders
    Average Sentiment: -0.188889 from 36 detections
r/stims
    Average Sentiment: -0.311111 from 9 detections


***
<b>Analysis of Sentiment Towards Doctors & Therapy/Therapists</b></br>

* pretty topical


In [None]:
reduced = reducer(pattern="[Dd]octor",
                  entDict=entityDict, 
                  subreddit_list=entityDict.keys(),
                  doesNotContain=['http'],
                  v1=0, v2=0, v3=0)
AverageSentiments(reduced_dict=reduced)

Matching pattern :: '[Dd]octor'
r/leaves
    Average Sentiment: -0.136364 from 11 detections
r/petioles
    Average Sentiment: 0.116667 from 6 detections
r/trees
    Average Sentiment: -0.075 from 4 detections
r/anxiety
    Average Sentiment: 0.021053 from 95 detections
r/depression
    Average Sentiment: -0.026087 from 23 detections
r/drugs
    Average Sentiment: -0.05 from 6 detections
r/stims
    Average Sentiment: -0.26 from 5 detections
r/psychonaut
    Average Sentiment: 0.1 from 2 detections
r/microdosing
    Average Sentiment: 0.06 from 10 detections
r/lsd
    Average Sentiment: -0.24375 from 16 detections
r/dmt
    Average Sentiment: 0.0 from 2 detections
r/redditorsinrecovery
    Average Sentiment: 0.106452 from 31 detections
r/addiction
    Average Sentiment: -0.047826 from 23 detections
r/opiates
    Average Sentiment: -0.027273 from 22 detections
r/cocaine
    Average Sentiment: -0.033333 from 3 detections
r/mdma
    Average Sentiment: -0.05 from 2 detections
r/ketamine
  

In [None]:
reduced = reducer(pattern="[Tt]herap[yist]",
                  entDict=entityDict, 
                  subreddit_list=entityDict.keys(),
                  doesNotContain=['http'],
                  v1=0, v2=0, v3=3)
AverageSentiments(reduced_dict=reduced)

Matching pattern :: '[Tt]herap[yist]'
r/Leaves unique expressions matched:
    {'therapist', 'therapy', 'therapists', 'therapy session', 'group therapy session', 'therapist session', 'chemotherapy'}
r/Petioles unique expressions matched:
    {'therapist', 'therapy costs', 'therapy', 'Therapist appointment', 'therapy session', 'cognitive behavioural therapy', 'childhood educator/therapist', 'feelings therapy'}
r/Trees unique expressions matched:
    None Found
r/Anxiety unique expressions matched:
    {'therapy trick', 'therapies', 'psychotherapy', 'therapy', 'therapist', 'case therapy', 'therapy sessions', 'Rhythm Therapy', 'psychologist / therapist', 'hypnotherapy', 'therapists', 'Hypnotherapy', 'Therapy'}
r/Depression unique expressions matched:
    {'therapies', 'therapist friend', 'video therapy', 'therapist', 'therapy', 'Rhythm Therapy', 'therapist alltogether', 'texting therapist', 'therapy needs', 'therapist ahould', 'therapists', 'Cognitive Behavioural Therapy', 'vid therapy se

***
<b>Analysis of Sentiment Towards covid</b></br>



In [None]:
reduced = reducer(pattern="[Cc]ollege|[Uu]niversity",
                  entDict=entityDict, 
                  subreddit_list=entityDict.keys(),
                  doesNotContain=['http'],
                  v1=0, v2=0, v3=3)
AverageSentiments(reduced_dict=reduced)

Matching pattern :: '[Cc]ollege|[Uu]niversity'
r/Leaves unique expressions matched:
    {'college roommates', 'college friend', 'university work', 'college student', 'university', 'community college', 'college', 'college counselor', 'college town'}
r/Petioles unique expressions matched:
    {'college student', 'college campus', 'college', 'University', 'college classes', 'university stuff', 'college students'}
r/Trees unique expressions matched:
    {'college'}
r/Anxiety unique expressions matched:
    {'college subjects', 'college life', 'college routine', 'college likes', 'college student', 'university', 'university lecture', 'College', 'college course', 'college', 'college group homeoffice project', 'college environment', 'colleges', 'university student'}
r/Depression unique expressions matched:
    {'college life', 'university preparation', 'college stress', 'college admissions', 'college friends', 'university classes', 'college student', 'college graduate', 'college cause', 'unive

***
<b>Analysis of Sentiment Towards Jobs</b></br>

In [None]:
reduced = reducer(pattern="[Jj]ob",
                  entDict=entityDict, 
                  subreddit_list=entityDict.keys(),
                  doesNotContain=['http','rim'],
                  v1=0, v2=0, v3=0)
AverageSentiments(reduced_dict=reduced)

Matching pattern :: '[Jj]ob'
r/leaves
    Average Sentiment: 0.098437 from 64 detections
r/petioles
    Average Sentiment: 0.106818 from 44 detections
r/trees
    Average Sentiment: 0.354545 from 11 detections
r/anxiety
    Average Sentiment: 0.072358 from 123 detections
r/depression
    Average Sentiment: -0.122727 from 198 detections
r/drugs
    Average Sentiment: 0.482353 from 17 detections
r/stims
    Average Sentiment: 0.216667 from 6 detections
r/psychonaut
    Average Sentiment: 0.3 from 9 detections
r/microdosing
    Average Sentiment: 0.192308 from 13 detections
r/lsd
    Average Sentiment: 0.136364 from 11 detections
r/dmt
    Average Sentiment: 0.65 from 2 detections
r/redditorsinrecovery
    Average Sentiment: 0.129167 from 72 detections
r/addiction
    Average Sentiment: 0.028571 from 49 detections
r/opiates
    Average Sentiment: -0.0125 from 16 detections
r/cocaine
    Average Sentiment: 0.0 from 3 detections
r/mdma
    Average Sentiment: 0.4 from 2 detections
r/ketamine

***
<b>Analysis of Sentiment Towards Friend(/s)</b></br>

In [None]:
reduced = reducer(pattern="[Ff]riend",
                  entDict=entityDict, 
                  subreddit_list=entityDict.keys(),
                  doesNotContain=['http'],
                  v1=0, v2=0, v3=3)
AverageSentiments(reduced_dict=reduced)

Matching pattern :: '[Ff]riend'
r/Leaves unique expressions matched:
    {'internet friend', 'friends group', 'friendships', 'boyfriend', 'stoner friends', 'boyfriend cause', 'college friend', 'narcissist ex girlfriend', 'friend', 'friendship', 'smoker boyfriend', 'best friend', 'Friends', 'friends house', 'girlfriend', 'fight friends', 'growth friends', 'friends'}
r/Petioles unique expressions matched:
    {'friend group', 'boyfriend', 'stoner friends', 'family friend', 'friend', 'ex girlfriend', 'girlfriends', 'break friend', 'best friend', 'stoner friend', 'friends place', 'help friends', 'accountability friend', 'smoker friends', 'girlfriend', 'friends'}
r/Trees unique expressions matched:
    {'advice friends', 'friend', 'friend lol', 'friends'}
r/Anxiety unique expressions matched:
    {'boyfriend', 'friend group', 'friend', 'friendship', 'bestfriend', 'way friend', 'girlfriends', 'friends wedding', 'best friend', 'Friends', 'friends house', 'friend kids', 'friend issues', 'Frien

***
<b>Analysis of Sentiment Towards Support</b></br>

In [None]:
reduced = reducer(pattern="[Ss]upport",
                  entDict=entityDict, 
                  subreddit_list=entityDict.keys(),
                  doesNotContain=['http'],
                  v1=0, v2=0, v3=3)
AverageSentiments(reduced_dict=reduced)

Matching pattern :: '[Ss]upport'
r/Leaves unique expressions matched:
    {'support network', 'support systems', 'support system', 'support guys', 'support'}
r/Petioles unique expressions matched:
    {'support groups', 'programming support', 'support'}
r/Trees unique expressions matched:
    {'supporter'}
r/Anxiety unique expressions matched:
    {'co support teacher', 'child support', 'Support', 'support network', 'support'}
r/Depression unique expressions matched:
    {'support request', 'family support', 'peer support', 'child support', 'support pillar', 'support group', 'Support', 'support groups', 'support space', 'support system', 'support'}
r/Drugs unique expressions matched:
    {'support mate', 'support', 'support system'}
r/Stims unique expressions matched:
    {'mental health support', 'support'}
r/Psychonaut unique expressions matched:
    {'Tech Support', 'tech support', 'support', 'support system'}
r/Microdosing unique expressions matched:
    {'support'}
r/Lsd unique ex