In [1]:
import sys
import time
import requests
from datetime import datetime, timedelta
import json
import numpy as np
from sentence_transformers import SentenceTransformer
import pandas as pd
import statsmodels.api as sm
from sklearn.metrics import r2_score
import nltk
from nltk.tokenize import sent_tokenize

I0505 18:30:17.485144 19264 file_utils.py:41] PyTorch version 1.1.0 available.


In [2]:
model = SentenceTransformer('bert-large-nli-mean-tokens')

I0505 18:30:38.375733 19264 SentenceTransformer.py:29] Load pretrained SentenceTransformer: bert-large-nli-mean-tokens
I0505 18:30:38.380663 19264 SentenceTransformer.py:32] Did not find a '/' or '\' in the name. Assume to download model from server.
I0505 18:30:38.388950 19264 SentenceTransformer.py:67] Load SentenceTransformer from folder: C:\Users\Jai/.cache\torch\sentence_transformers\public.ukp.informatik.tu-darmstadt.de_reimers_sentence-transformers_v0.2_bert-large-nli-mean-tokens.zip
I0505 18:30:38.489317 19264 configuration_utils.py:281] loading configuration file C:\Users\Jai/.cache\torch\sentence_transformers\public.ukp.informatik.tu-darmstadt.de_reimers_sentence-transformers_v0.2_bert-large-nli-mean-tokens.zip\0_BERT\config.json
I0505 18:30:38.493524 19264 configuration_utils.py:319] Model config BertConfig {
  "_num_labels": 2,
  "architectures": null,
  "attention_probs_dropout_prob": 0.1,
  "bad_words_ids": null,
  "bos_token_id": null,
  "decoder_start_token_id": null,
 

In [3]:
def fit_beta_reg(y, X):
    binom_glm = sm.GLM(y, X, family=sm.families.Binomial())
    binom_results = binom_glm.fit()
    return binom_results
# plt.plot(y, binom_results.predict(X), 'o', alpha=0.2);

In [4]:
def goodness_of_fit(model, true, X):
    y_predicted = model.get_prediction(X)
    pred_vals = y_predicted.summary_frame()['mean']
    print(r2_score(true, pred_vals))

In [5]:
def sentence_tokenize(text):
    return sent_tokenize(text)

In [6]:
def none_or_empty(text):
    return text is None or len(text) == 0 or text == "[removed]" or text == '[deleted]'

# SETUP AND TEST

In [7]:
df_vad = pd.read_csv('Vad Lexicon/lexicon.txt', delimiter='\t', header = 0)
df_vad = df_vad.dropna()
df_vad.index = df_vad['Word']
df_vad = df_vad[['Valence', 'Arousal', 'Dominance']]

In [8]:
vad_words = list(df_vad.index)
vad_embeddings = model.encode(vad_words)


Batches: 100%|██████████| 2501/2501 [22:08<00:00,  1.98it/s]


In [9]:
vad_embeddings = np.array(vad_embeddings)

In [10]:
valence = np.array(df_vad['Valence'].tolist())
arousal  = np.array(df_vad['Arousal'].tolist())
dominance = np.array(df_vad['Dominance'].tolist())

# GOODNESS OF FIT

In [11]:
import pickle

In [12]:
titles = ['valence', 'arousal', 'dominance']
dims = [valence, arousal, dominance]
for i in range(len(titles)):
    reg_model = fit_beta_reg(dims[i], vad_embeddings)
    goodness_of_fit(reg_model, dims[i], vad_embeddings)
    with open(f"{titles[i]}.glm.pkl", 'wb') as file:
        reg_model.remove_data()
        pickle.dump(reg_model, file)
        goodness_of_fit(reg_model, dims[i], vad_embeddings)


0.719292563382529




0.719292563382529
0.6015808128609319




0.6015808128609319
0.6420082478782179




0.6420082478782179


# INTUITION TEST

In [156]:
text = 'this is a happy text, be glad! this is extremely sad. this is very sad. this is quite sad. this is fantastic. please make the pain go away. I cannot stand how terrible this is. what would the valence of this be?'
x = sentence_tokenize(text)
xem = model.encode(x)
print(len(xem))
binom_results.get_prediction(xem).summary_frame()


Batches:   0%|          | 0/1 [00:00<?, ?it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  2.12it/s]


8


Unnamed: 0,mean,mean_se,mean_ci_lower,mean_ci_upper
0,0.955938,0.044147,0.869412,1.042464
1,0.049444,0.029379,-0.008138,0.107026
2,0.068133,0.029428,0.010455,0.12581
3,0.101341,0.029293,0.043927,0.158755
4,0.89872,0.026898,0.846001,0.951438
5,0.487999,0.050589,0.388847,0.587151
6,0.007473,0.034649,-0.060437,0.075383
7,0.705989,0.036747,0.633966,0.778012


In [86]:
x = binom_results.get_prediction(xem)
x.predicted_mean

array([0.95593824, 0.04944408, 0.06813251, 0.10134126, 0.89871954,
       0.48799918, 0.007473  , 0.70598921])

# GET DIMENSION SCORE

In [91]:
DIRNAME = 'data_r2/'

In [112]:
def get_subreddit_data(subreddit):
    with open(f'{DIRNAME}r.{subreddit}.submission.json', 'r') as file:
        data = file.readlines()
        return [json.loads(line) for line in data] 

In [113]:
get_subreddit_data('belgium')

[{'author': 'RookieBambooXJ9',
  'created_utc': 1579969312,
  'id': 'etsr25',
  'permalink': '/r/belgium/comments/etsr25/should_we_be_worried_about_the_corona_virus_here/',
  'selftext': 'Thoughts? What about preventative measures?',
  'title': 'Should we be worried about the Corona virus here in Belgium? Since there are 3 confirmed cases in Francs?'},
 {'author': 'GundamNewType',
  'created_utc': 1580806716,
  'id': 'eynb98',
  'permalink': '/r/belgium/comments/eynb98/news_first_coronavirus_case_in_belgium/',
  'selftext': '   BRUSSELS (Reuters) - A Belgian person, one of nine repatriated from Wuhan in China on Sunday, has tested positive for the coronavirus, Belgium’s health agency said on Tuesday.  The agency said all nine had undergone a series of tests in a military hospital in the capital Brussels. Eight of them tested negative.  No details were given about the person who tested positive, but the agency said they were in good health and not currently showing any symptoms of the v

In [167]:
def get_subreddit_score(subreddit, reg_model):
    # Get data stored for subreddit
    
    data = get_subreddit_data(subreddit)
    all_means = np.array([])
    def get_post_score(text):
#         sen_split = sentence_tokenize(text)
        encoded_sen = model.encode(text)
        pred_results = reg_model.get_prediction(encoded_sen)
        pred_means = pred_results.predicted_mean
        return pred_means
    
    valid_text = []
    for element in data:
        try:
            text = element['body']
        except KeyError:
            text = element['selftext'] if not none_or_empty(element['selftext']) else element['title']
        valid_text.extend(sentence_tokenize(text))
    print(len(valid_text))
    scores = get_post_score(valid_text)
    print(len(scores))
    return np.mean(scores) 
        

In [None]:
for dim in [valence, arousal, dominance]:
    reg_model = fit_beta_reg(dim, vad_embeddings)
    print(goodness_of_fit(reg_model, dim, vad_embeddings))
    for subreddit in subreddits:
        print(get_subreddit_score(subreddit, reg_model))

In [168]:
get_subreddit_score('belgium', binom_results)

1314



Batches:   0%|          | 0/165 [00:00<?, ?it/s]
Batches:   1%|          | 1/165 [00:01<02:47,  1.02s/it]
Batches:   1%|          | 2/165 [00:01<02:25,  1.12it/s]
Batches:   2%|▏         | 3/165 [00:02<02:09,  1.25it/s]
Batches:   2%|▏         | 4/165 [00:02<01:57,  1.37it/s]
Batches:   3%|▎         | 5/165 [00:03<02:11,  1.22it/s]
Batches:   4%|▎         | 6/165 [00:04<02:17,  1.16it/s]
Batches:   4%|▍         | 7/165 [00:05<02:14,  1.18it/s]
Batches:   5%|▍         | 8/165 [00:06<02:18,  1.13it/s]
Batches:   5%|▌         | 9/165 [00:07<02:17,  1.14it/s]
Batches:   6%|▌         | 10/165 [00:08<02:13,  1.16it/s]
Batches:   7%|▋         | 11/165 [00:09<02:18,  1.11it/s]
Batches:   7%|▋         | 12/165 [00:10<02:24,  1.06it/s]
Batches:   8%|▊         | 13/165 [00:11<02:20,  1.08it/s]
Batches:   8%|▊         | 14/165 [00:11<02:14,  1.12it/s]
Batches:   9%|▉         | 15/165 [00:12<02:17,  1.09it/s]
Batches:  10%|▉         | 16/165 [00:13<02:20,  1.06it/s]
Batches:  10%|█         | 17/16

1314


0.461150318249819

# DETERMINING ENGLISH THRESHOLD

In [130]:
import fasttext
PRETRAINED_MODEL_PATH = 'lid.176.bin'
ft_model = fasttext.load_model(PRETRAINED_MODEL_PATH)




In [139]:
data = get_subreddit_data('belgium')
for post in data:
    print(post['selftext'])
    print(ft_model.predict(post['selftext']))

Thoughts? What about preventative measures?
(('__label__en',), array([0.94688642]))
   BRUSSELS (Reuters) - A Belgian person, one of nine repatriated from Wuhan in China on Sunday, has tested positive for the coronavirus, Belgium’s health agency said on Tuesday.  The agency said all nine had undergone a series of tests in a military hospital in the capital Brussels. Eight of them tested negative.  No details were given about the person who tested positive, but the agency said they were in good health and not currently showing any symptoms of the virus.  A further person, from Denmark, who had not been able to return home on Sunday, had also tested negative, the agency said.  Wuhan is the center of the newly identified coronavirus outbreak in China that has killed over 400 people, with more than 20,000 infected.  Reporting by Philip Blenkinsop  *Our Standards:*[*The Thomson Reuters Trust Principles.*](http://thomsonreuters.com/en/about-us/trust-principles.html)
(('__label__en',), array(

Hello guys,  I have a question regarding a health insurance in Belgium.  Short Info:  I was planning to come in Brussels with work. The contract I have is for work in my country, but I am allowed to come to client, in another country and work from there, so I would not pay any taxes in Belgium, as I pay them in my country and with that in mind I was not planing to have a medical insurance in Belgium.  But since WHO declared that COVID19 is pandemic, the health insurance for travelers that I planned to have would become useless in case of me getting sick with the virus.  So now, I try to get some information about getting a medical insurance in Belgium that would cover my expenses in case of getting sick with the COVID19, so that in worst case scenario of me getting sick, I would not have to pay a bill of thousands of euros.  Again, no local work contract, as I would keep my contract in my country, but I imagine I could pay money in order to get the insurance.  So what would be the proc

New Megathread for all Covid-19 discussion, new cases,...  Major news like new nationwide measures is allowed as a separate post, all else goes here.   **We will crack down on false or unconfirmed information. If you post things that have major implications (closures,...): add a source. Other posts will be deleted. Regular discussion is ofc no problem.**  **VRT**: https://www.vrt.be/vrtnws/nl/dossiers/2020/01/coronavirus-china/  **De Morgen**: https://www.demorgen.be/nieuws/live-coronavirus-dodentol-in-italie-loopt-op-naar-79-15de-geval-in-belgie-bevestigd~b638c04f/  **LN24**: https://www.ln24.be/2020-03-13/covid-19-en-direct  (feel free to post other liveblogs, I'll add them)  **Official Belgian info**: https://www.info-coronavirus.be/nl/news/ (Thanks /u/GiveMeFalseHope)  **Flemish school guidelines**: https://www.onderwijs.vlaanderen.be/nl/nl/coronavirus  **Sciensano Stats**: https://epidemio.wiv-isp.be/ID/Pages/2019-nCoV_epidemiological_situation.aspx (Thanks /u/igor_sk)  **Global S

 *TL;DR: Don't hoard more then you need, be mindful of going out multiple times for groceries stock up what you need to prevent going outside. We need to contain the pandemic not normalize it. For your safety, it is best to be careful and make lesser trips outside and especially to supermarkets that are now high infection zone areas.*    Let me first start by saying I do not condone unnecessary hoarding. People who buy 20x more than they actually need do not help the overall situation. There is a clear difference between overbuying and storing in for a pandemic. [It is officially advised by the WHO to AT LEAST store lastable food and even medication for 2 weeks.](https://www.who.int/docs/default-source/coronaviruse/mental-health-considerations.pdf?sfvrsn=6d3578af_8) People who condemn ALL kinds of storing of food are a huge problem as they encourage the behavior of going to the supermarket more than needed.   Even if I get downvoted for saying this, I am going to defend this behavior a

So, I promised an update on various Reddit threads on the Belgian telecomdata being shared with the government, to combat the Coronavirus ([https://www.standaard.be/cnt/dmf20200320\_04896707](https://www.standaard.be/cnt/dmf20200320_04896707)).   We (Ministry of Privacy) were in contact this morning with Philippe De Backer, who (next to Maggie De Block) is leading the 'Data Corona Taskforce'. In this taskforce, there are various stakeholders (some more troubling than others).   They ensured us all data would be aggregated on postcode level, so individuals can't be identified. When asked if they want to pursue identification in a later stage, they insisted identification is a no-go for them. It is possible - if things get worse - they might want to activate 'districts' level instead of postcode-level.   Data is coming from all Belgian operators, 'anonimised', and then interpreted by Data Dahlberg Institue. They insured us they are an European entity (which is still to be determined) and

So i'm a student who under circumstances live at a studenthouse (op kot). So far that I know of, there aren't many students staying here anymore. Except my neighbour (girl, age: ??? +18 I presume) who lives at the end of the hallway. I've never met her, but this is the second time now that I can hear, that she invites her friends over (sounds like 4 or 5 people in total) and all have a big party at her dorm.    I thought that it was prohibited now in Belgium to not only organise private parties but also to be in the presence with +3 people.     Now I don't know what to do, should I call the police the next time this happens again?   Or is that a bit extreme and I should just go to her frontdoor and tell her?   But I also don't what to get the possibility to get the corona virus (if she hasn't already got it lol).   Part of me wants to do this in a diplomatic way and talk with her, but then I think how selfish and no respect she has to do something like that. It's not only prohibited, i

I am hoping somebody can give me any information? Due too the Covid-19 lockdown, I am without work and I emailed my huisbaas asking if I could possibly pay my rent 2 or 3 days late. Also I asked him about a letter my partner had last year (which he already received a copy of) stating that as she is registered gehandicapte due to an illness, she can claim nearly €170 euro back from the huisbaas. Problem is every time I ask him about it he completely ignores my email.. So again when I asked him about paying the rent 2 or 3 days late I asked him again about the money we could claim off him. Again he totally ignored this bit and just answered that the rent needs to be paid on time according to the huurcontract.  My partner, who is Dutch and I have searched online and cannot find how this money is paid.  The letter she  received started so  Geachte mevrouw, mijnheer, U heeft ais huurder een geldige aanvraag gedaan om een vermindering van de onroerende voormetffing te bekomen
(('__label__en'

I'm interested in philosophy and ethics but never really studied it, however I am interested in what other people see/know/think about the specific topic of 'guidelines' for doctors in case the pandemic gets really bad and doctors have to choose who to save and who to let die.   First of all I want to say that it is not an unicum as I can imagine people working in war zones are making similar decisions on a daily basis and there the 'enemy' is another human being so I can imagine the psychology being a bit different than in these times.   Secondly, I'm not following all the news about all the different countries but it would be interesting to see how other countries deal with it as I believe the ethics are an important indicator of how a society thinks. For example more 'liberal' countries vs. more catholic/conservative ones or more capitalistic vs. more communistic etc. And to see if the differences in how they deal with this, is in line with one of the other divisions or if it's a co

(RANT)  TL; DR: Husband's employer knows my husband suffers from HIV and is strongly advised by his specialist doctor to work from home and self quarantine. Boss gives him a hard time about not coming to the office.   So, my husband has had a diagnosis for HIV since jan 2015.  He was very, very ill since nov 2014 . Had a double pneumonia, constant fever, thrush, etc. Didn't respond to any treatment.  At last in jan 2015 we had the devastating diagnose of AIDS. CD4 were almost zero. His body was on the verge of giving up and had only weeks left if he didn't get treatment quickly and responded to it. Luckily he did and slowly recovered. 5y later he is very healthy, but is ofcourse 100% dependend on his HIV medication and is at risk for several health issues.  And so Corona happened.... Husband works in a shared private practice for mental health workers in Brussels since 2017 (psychologists, psychiatrists, social workers, ...for mostly patients with foreign roots), but in administration:

I have a ticket for a concert this summer in Brussels, they moved it to summer 2021 due to covid. I went to get a refund of my ticket but they say:    " If you are unable to attend the event on the new date due to of personal reasons, you can submit a refund request within 30 days. In that case you fill in the [**refund request form**](https://help.ticketmaster.be/hc/en-us/requests/new?ticket_form_id=360000140178) and provide us with with valid supporting documents - you have to demonstrate why you cannot attend the new date. A marriage, a planned medical procedure or a planned holiday are examples of reasons that can be accepted. "  Is this even legal? What if I'm planning to go on holidays but have no documentation yet? What if I have a birthday or whatever? Or I simply don't want to save that date for that concert?   Link:    [https://help.ticketmaster.be/hc/en-us/articles/360006602358-What-if-my-event-is-cancelled-or-rescheduled-due-to-Coronavirus-COVID-19-](https://help.ticketmast

In [145]:
text = 'hello this is a awful day'
encoded_sen = model.encode(text)
pred_results = binom_results.get_prediction(encoded_sen)
pred_means = pred_results.predicted_mean
np.mean(pred_means)


Batches:   0%|          | 0/4 [00:00<?, ?it/s]
Batches:  25%|██▌       | 1/4 [00:00<00:00,  5.17it/s]
Batches:  50%|█████     | 2/4 [00:00<00:00,  5.26it/s]
Batches:  75%|███████▌  | 3/4 [00:00<00:00,  5.29it/s]
Batches: 100%|██████████| 4/4 [00:00<00:00,  5.83it/s]


0.5391321256414822

In [149]:
x = binom_results.get_prediction(xem)
x.predicted_mean

array([0.95593824, 0.04944408, 0.06813251, 0.10134126, 0.89871954,
       0.48799918, 0.007473  , 0.70598921])

In [160]:
nem = model.encode(['this is an awful day'])
print(len(nem))
x = binom_results.get_prediction(nem)



Batches:   0%|          | 0/1 [00:00<?, ?it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  6.99it/s]


1


In [161]:
x.predicted_mean

array([0.13010689])