In [None]:
import openai
import pandas as pd
import numpy as np
import math

# Are informal comments more toxic?

In this notebook we'll use Marianna Apidianaki's method of calculating interpretable dimensions in semantic vector space on the fly using seed pairs. To start, we want to look at the same dimensions: formality and complexity. But we want to look at the sentence level rather than the word level. 

## Step 1: Generating formality seed pairs

We want sevenish pairs of sentences, or really two symmetrical groups of sentences, that can be used to calculate a dimension. 

In [11]:
sentences = """Last week I got into a car accident.
She had some amazing news to share but nobody to share it with.
Sometime you just have to give up and win by cheating.
They desperately needed another drummer since the current one only knew how to play bongos.
The bread dough reminded her of Santa Clause’s belly.
He realized there had been several deaths on this road, but his concern rose when he saw the exact number.
Trash covered the landscape like sprinkles do a birthday cake."""
sentences = sentences.split("\n")
sentences

['Last week I got into a car accident.',
 'She had some amazing news to share but nobody to share it with.',
 'Sometime you just have to give up and win by cheating.',
 'They desperately needed another drummer since the current one only knew how to play bongos.',
 'The bread dough reminded her of Santa Clause’s belly.',
 'He realized there had been several deaths on this road, but his concern rose when he saw the exact number.',
 'Trash covered the landscape like sprinkles do a birthday cake.']

### Step 1: Load and use GPT to generate sentences

In [12]:
from openai import OpenAI
client = OpenAI() # OPENAI_API_KEY environment variable must be set. see quickstart tutorial here: https://platform.openai.com/docs/quickstart?context=python



Try an example completion

In [13]:
sentence = sentences[0]

messages=[
    {"role": "system", "content": "You are a rewording assistant, skilled in transforming a statement to express more or less of a given quality or property."},
    {"role": "user", "content": "Rephrase the following statement to use language that is more complex: \"{}\" .".format(sentence)}
  ]


In [14]:
completion = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=messages
)

print(completion.choices[0].message)

ChatCompletionMessage(content='I experienced a vehicular collision the previous week.', role='assistant', function_call=None, tool_calls=None)


In [15]:
completion.choices[0]

Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content='I experienced a vehicular collision the previous week.', role='assistant', function_call=None, tool_calls=None), logprobs=None)

We'll feed this output back to the api 

In [16]:
messages.append({'role': 'system', 'content': completion.choices[0].message.content})
messages.append({"role": "user", "content": "Good. Rephrase the sentence again to use language that is even more complex."})
messages

[{'role': 'system',
  'content': 'You are a rewording assistant, skilled in transforming a statement to express more or less of a given quality or property.'},
 {'role': 'user',
  'content': 'Rephrase the following statement to use language that is more complex: "Last week I got into a car accident." .'},
 {'role': 'system',
  'content': 'I experienced a vehicular collision the previous week.'},
 {'role': 'user',
  'content': 'Good. Rephrase the sentence again to use language that is even more complex.'}]

In [17]:
def complete(messages):
    completion = client.chat.completions.create(
      model="gpt-3.5-turbo",
      messages=messages,
      seed=42
    )
    return completion.choices[0].message.content

complete(messages)

'During the course of the preceding week, I found myself involved in a motor vehicle collision.'

### prompt templates

In [18]:
# dictionary of the adjectives we use (property adjective and antonym) to create prompts

property_dict = {
    'complexity':   ('complex', 'simple'),
    'emotion':      ('emotional', 'emotionless')
}


We will generate sentences from a series of templates. For each sentence, we want to generate 'more x', 'even more x', as well as 'less x' and 'even less x'. Because the model often produces longer sentences for 'more' prompts, we also prompt for rephrasings using an antonymous adjective. So, for example, we ask for rephrasings that are "more complex" as well as rephrasings that are "less simple". We then use all of these rephrasings to calculate the complexity dimension.

In [19]:
# TODO save 5 responses instead of 1


adj, antonym = property_dict['complexity']

data = []

for sent in sentences:

    for i, x in enumerate([adj, antonym]):
        print(i)
        print(x)
        messages=[
            {"role": "system", "content": "You are a rewording assistant, skilled in transforming a statement to express more or less of a given quality or property."},
        ]

        
        # more
        more_messages = messages + [{"role": "user", "content": "Rephrase the following statement to use language that is more {}: \"{}\" .".format(x,sent)}]
        more = complete(more_messages)
        row = {
             'sentence': sent,
             'text': more,
             'more': 1,
             'even_more': 0,
             'less': 0,
             'even_less':  0,
             'property': 'complexity',
             'adjective': x,
             'antonym?': 0 if i == 0 else 1 # the second in the pair is the antonym
        }
        data.append(row)
        print(more)
                         
        # even more
        even_more_messages = more_messages + [{"role": "system", "content": more}] + [{"role": "user", "content": "Good. Rephrase the sentence again to use language that is even more {}.".format(x)}]
        even_more = complete(even_more_messages)
        row = {
             'sentence': sent,
             'text': even_more,
             'more': 0,
             'even_more': 1,
             'less': 0,
             'even_less':  0,
             'property': 'complexity',
             'adjective': x,
             'antonym?': 0 if i == 0 else 1 # the second in the pair is the antonym
        }
        data.append(row)
        print(even_more)

        # TODO even even more

        # less
        less_messages = messages + [{"role": "user", "content": "Rephrase the following statement to use language that is less {}: \"{}\" .".format(x,sent)}]
        less = complete(less_messages)
        row = {
             'sentence': sent,
             'text': less,
             'more': 0,
             'even_more': 0,
             'less': 1,
             'even_less':  0,
             'property': 'complexity',
             'adjective': x,
             'antonym?': 0 if i == 0 else 1 # the second in the pair is the antonym
        }
        data.append(row)
        print(less)

        # even less
        even_less_messages = less_messages + [{"role": "system", "content": less}] + [{"role": "user", "content": "Good. Rephrase the sentence again to use language that is even less {}.".format(x)}]
        even_less = complete(even_less_messages)
        row = {
             'sentence': sent,
             'text': even_less,
             'more': 0,
             'even_more': 0,
             'less': 0,
             'even_less':  1,
             'property': 'complexity',
             'adjective': x,
             'antonym?': 0 if i == 0 else 1 # the second in the pair is the antonym
        }
        data.append(row)
        print(even_less)

    
df = pd.DataFrame.from_records(data)
df

0
complex
The previous week, I was involved in a vehicular collision.
During the preceding week, I found myself embroiled in a motor vehicle collision.
I had a car accident last week.
I had a crash with my car last week.
1
simple
Last week, I was in a car crash.
Last week, my car crashed.
Last week I was involved in a collision while operating a motor vehicle.
During the course of the previous week, I was engaged in a vehicular collision resulting in damage to my automobile.
0
complex
She was in possession of astounding news, yet there was a dearth of individuals with whom she could disseminate it.
She found herself in the possession of awe-inspiring tidings that yearned to be shared, however, she was met with the unfortunate circumstance of not having anyone in her proximity with whom she could partake in the act of disseminating the aforementioned news.
She had incredible news to tell, but no one to tell it to.
She had really great news, but no one to tell it to.
1
simple
She had som

Unnamed: 0,sentence,text,more,even_more,less,even_less,property,adjective,antonym?
0,Last week I got into a car accident.,"The previous week, I was involved in a vehicul...",1,0,0,0,complexity,complex,0
1,Last week I got into a car accident.,"During the preceding week, I found myself embr...",0,1,0,0,complexity,complex,0
2,Last week I got into a car accident.,I had a car accident last week.,0,0,1,0,complexity,complex,0
3,Last week I got into a car accident.,I had a crash with my car last week.,0,0,0,1,complexity,complex,0
4,Last week I got into a car accident.,"Last week, I was in a car crash.",1,0,0,0,complexity,simple,1
5,Last week I got into a car accident.,"Last week, my car crashed.",0,1,0,0,complexity,simple,1
6,Last week I got into a car accident.,Last week I was involved in a collision while ...,0,0,1,0,complexity,simple,1
7,Last week I got into a car accident.,"During the course of the previous week, I was ...",0,0,0,1,complexity,simple,1
8,She had some amazing news to share but nobody ...,"She was in possession of astounding news, yet ...",1,0,0,0,complexity,complex,0
9,She had some amazing news to share but nobody ...,She found herself in the possession of awe-ins...,0,1,0,0,complexity,complex,0


Save so we don't have to query the api every time

In [23]:
df.to_csv('make_it_more_complexity_pilot_seed_sentences.csv')

## Step 2: Calculating the formality dimension



In [24]:
df = pd.read_csv('make_it_more_complexity_pilot_seed_sentences.csv')

Now that we have our seed sentences for the complexity dimension, we need to get the vector differences for the seed pairs.

We generated 8 sentences for each original seed sentence, meaning we have four seed pairs.

The formulas for the four seed pairs are as follows:

- ( adjective + more ) - (adjective + less)
- ( adjective + even more ) - (adjective + even less)
- ( antonym + less ) - (antonym + more )
- ( antonym + even less ) - (antonym + even more )

First we get an embedding for each sentence. Then, for each seed sentence we calculate these four formulae to get the vector differences, storing those in a separate list. And then we average those together. 

In [25]:
df.head(5)

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,sentence,text,more,even_more,less,even_less,property,adjective,antonym?
0,0,0,Last week I got into a car accident.,"The previous week, I was involved in a vehicul...",1,0,0,0,complexity,complex,0
1,1,1,Last week I got into a car accident.,"During the preceding week, I found myself embr...",0,1,0,0,complexity,complex,0
2,2,2,Last week I got into a car accident.,I had a car accident last week.,0,0,1,0,complexity,complex,0
3,3,3,Last week I got into a car accident.,I had a crash with my car last week.,0,0,0,1,complexity,complex,0
4,4,4,Last week I got into a car accident.,"Last week, I was in a car crash.",1,0,0,0,complexity,simple,1


--NOPE__So now that we have our seed sentences for the complexity dimension, we need to split them into negative and positive sentences. The generated sentences should be divided as follows.

Positive
- adjective + more
- adjective + even more
- antonym + less
- antonym + even less

Negative
- adjective + less
- adjective + even less
- antonym + more
- antonym + even more

After we split them into positive and negative examples, we embed them using SBERT--

In [17]:
# positive = df[df['antonym?']==0][df['more']==1]['text'].to_list() + df[df['antonym?']==0][df['even_more']==1]['text'].to_list() + df[df['antonym?']==1][df['less']==1]['text'].to_list() + df[df['antonym?']==1][df['even_less']==1]['text'].to_list() 
# negative = df[df['antonym?']==0][df['less']==1]['text'].to_list() + df[df['antonym?']==0][df['even_less']==1]['text'].to_list() + df[df['antonym?']==1][df['more']==1]['text'].to_list() + df[df['antonym?']==1][df['even_more']==1]['text'].to_list() 

# print(positive)
# print()
# print(negative)

Obviously we run into the problem where vectors are word level and we want sentence-level representations. The absolute simplest thing I can think of to do here is to use SentenceBERT, which we will download from huggingface.

After initializing the model, we generate vector representations for each sentence in the informal list and for each corresponding sentence in the formal list. We subtract the vectors from one another and then take the average, leaving us with a vector that represents the formality dimension. We can rate any sentence vector(s) on the formality dimension by giving them (as a list) to the function predict_scalarproj along with the dimension itself. 

In [None]:
# load sbert
!pip install -U sentence-transformers

In [48]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')

sentences = df['text'].to_list()

#Sentences are encoded by calling model.encode()
embeddings = model.encode(sentences)


df = df.assign(embedding=embeddings.tolist())

df.head(5)

# #Print the embeddings
# for sentence, embedding in zip(positive[:5], pos_embeddings[:5]):
#     print("Sentence:", sentence)
#     print("Embedding:", embedding[:100])
#     print("")

Unnamed: 0.1,Unnamed: 0,sentence,text,more,even_more,less,even_less,property,adjective,antonym?,embedding
0,0,Last week I got into a car accident.,"The previous week, I was involved in a vehicul...",1,0,0,0,complexity,complex,0,"[0.05463976413011551, 0.04207247123122215, 0.0..."
1,1,Last week I got into a car accident.,"During the preceding week, I found myself embr...",0,1,0,0,complexity,complex,0,"[0.016205446794629097, 0.01861356757581234, 0...."
2,2,Last week I got into a car accident.,I had a car accident last week.,0,0,1,0,complexity,complex,0,"[-0.01675170101225376, 0.004364470951259136, 0..."
3,3,Last week I got into a car accident.,I had a crash with my car last week.,0,0,0,1,complexity,complex,0,"[0.024704160168766975, 0.015588822774589062, 0..."
4,4,Last week I got into a car accident.,"Last week, I was in a car crash.",1,0,0,0,complexity,simple,1,"[0.029179640114307404, -0.011056442745029926, ..."


In [71]:
difference_vecs = []

def more(df):
    df = df[df['adjective']=='complex'] 
    df = df[df['more']==1]
    return df

def less(df):
    df = df[df['adjective']=='complex'] 
    df = df[df['more']==1]
    return df['embedding'].values[0]

for sentence in df['sentence'].unique():
    # there are 8 seeds with this sentence
    print(len(df[df['sentence']==sentence]))
    
    seeds = df[df['sentence']==sentence]
    
    # now we want to calculate the four different formulae
    
    #( adjective + more ) - (adjective + less)
    a = seeds[seeds['adjective']=='complex'] 
    a = seeds[seeds['more']==1] ['embedding'].values[0]
    b = seeds[seeds['adjective']=='complex'] 
    b = seeds[seeds['less']==1] ['embedding'].values[0]
    diff_vec = np.asarray(a) - np.asarray(b)
    difference_vecs.append(diff_vec)
    
    #( adjective + even more ) - (adjective + even less)
    a = seeds[seeds['adjective']=='complex'] 
    a = seeds[seeds['even_more']==1] ['embedding'].values[0]
    b = seeds[seeds['adjective']=='complex'] 
    b = seeds[seeds['even_less']==1] ['embedding'].values[0]
    diff_vec = np.asarray(a) - np.asarray(b)
    difference_vecs.append(diff_vec)
    
    #( antonym + less ) - (antonym + more )
    a = seeds[seeds['adjective']=='simple'] 
    a = seeds[seeds['less']==1] ['embedding'].values[0]
    b = seeds[seeds['adjective']=='simple'] 
    b = seeds[seeds['more']==1] ['embedding'].values[0]
    diff_vec = np.asarray(a) - np.asarray(b)
    difference_vecs.append(diff_vec)
    
    #( antonym + even less ) - (antonym + even more )
    a = seeds[seeds['adjective']=='simple'] 
    a = seeds[seeds['even_less']==1] ['embedding'].values[0]
    b = seeds[seeds['adjective']=='simple'] 
    b = seeds[seeds['even_more']==1] ['embedding'].values[0]
    diff_vec = np.asarray(a) - np.asarray(b)
    difference_vecs.append(diff_vec)
    
print(len(difference_vecs))



8
8
8
8
8
8
8
28


In [94]:
print(difference_vecs[23])

[ 4.03994247e-02 -6.06127921e-02  9.14853811e-03 -1.28718689e-02
  4.68865782e-03 -3.19791464e-02 -1.44007057e-02  4.25428115e-02
 -4.08821441e-02  3.97744404e-02 -2.72350453e-02  4.07437040e-02
  2.26934589e-02  3.26484535e-02 -6.18782695e-02  4.65744315e-02
 -4.86825928e-02 -5.79366945e-02 -4.11888286e-02 -7.31274039e-02
 -2.64015459e-02  1.27541311e-02  7.77818928e-02 -5.24954870e-04
 -4.39719101e-02  9.29965219e-03  4.17338414e-02  6.45466708e-02
  6.60448149e-03 -1.24711413e-02 -1.03509054e-03  3.88942440e-02
  5.11108879e-02 -1.08129010e-02 -6.55224267e-02 -1.74900133e-01
 -5.44179307e-03 -4.79404740e-02 -3.60480137e-03  3.70678650e-02
  4.74482216e-03  3.01680929e-03  1.50575889e-02 -6.54721260e-03
 -2.34416593e-02 -1.27597973e-02  2.91121751e-03 -4.61295992e-02
  1.25788107e-01 -3.33999991e-02  1.88923515e-02 -8.66836868e-03
  4.77682175e-02 -3.29941884e-02  7.08529465e-02 -1.27413504e-01
 -5.19959852e-02 -1.33465917e-03 -3.66910920e-03  1.19598340e-01
  3.02098254e-02 -4.43065

In [103]:
dimvec = np.mean(difference_vecs[:8], axis = 0)
dimvec

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.

In [None]:
#### from marianna + katrin
# seed-based method
# averaging over seed pair vectors
# def dimension_seedbased(seeds_pos, seeds_neg, space, paired = False):
#     diffvectors = [ ]
    
#     for negword, posword in _make_seedpairs(seeds_pos, seeds_neg, paired = paired):
#         diffvectors.append(space[posword] - space[negword])

#     # average
#     dimvec = np.mean(diffvectors, axis = 0)
#     return dimvec


In [75]:
def dimension_seedbased():
    return dimvec

In [77]:
complexity_dimension = dimension_seedbased()

In [78]:
# vector scalar projection (from marianna + katrin)
def predict_scalarproj(veclist, dimension):
    dir_veclen = math.sqrt(np.dot(dimension, dimension))
    return [np.dot(v, dimension) / dir_veclen for v in veclist]

# Step 3: validating the formality dimension

does it behave the same way as a standard classifier?


We load a regular classifier

We run this prediction method and the formality classifier on the formality dataset. 

We compare. Is the dimension-based method that much worse?

We load a formality dataset - perhaps the word-based one that Marianna uses.

We order the entries by their complexity rating and look at where they fall on our complexity axis.

## Step 4: Rating Toxicity Datasets for formality

We'll start with the 1000-length parallel dataset from the text detoxification paper. 

We load it in

We SBERTize the sentences

We pass them to the prediction method. 

We observe: do toxic and nontoxic comments differ wrt formality?

In [None]:
!pip install datasets

In [79]:
from datasets import load_dataset

dataset = load_dataset("civil_comments")

Downloading readme: 100%|██████████████████████████████████████████████████████████| 7.73k/7.73k [00:00<00:00, 30.3MB/s]
Downloading data: 100%|██████████████████████████████████████████████████████████████| 194M/194M [00:15<00:00, 12.8MB/s]
Downloading data: 100%|██████████████████████████████████████████████████████████████| 187M/187M [00:15<00:00, 12.4MB/s]
Downloading data: 100%|████████████████████████████████████████████████████████████| 21.0M/21.0M [00:02<00:00, 10.4MB/s]
Downloading data: 100%|████████████████████████████████████████████████████████████| 20.8M/20.8M [00:02<00:00, 9.10MB/s]
Generating train split: 100%|█████████████████████████████████████| 1804874/1804874 [00:01<00:00, 1175672.09 examples/s]
Generating validation split: 100%|█████████████████████████████████████| 97320/97320 [00:00<00:00, 991451.89 examples/s]
Generating test split: 100%|██████████████████████████████████████████| 97320/97320 [00:00<00:00, 1106339.97 examples/s]


In [80]:
dataset["train"][0]

{'text': "This is so cool. It's like, 'would you want your mother to read this??' Really great idea, well done!",
 'toxicity': 0.0,
 'severe_toxicity': 0.0,
 'obscene': 0.0,
 'threat': 0.0,
 'insult': 0.0,
 'identity_attack': 0.0,
 'sexual_explicit': 0.0}

In [81]:
dataset["train"][:10]['text']

["This is so cool. It's like, 'would you want your mother to read this??' Really great idea, well done!",
 "Thank you!! This would make my life a lot less anxiety-inducing. Keep it up, and don't let anyone get in your way!",
 'This is such an urgent design problem; kudos to you for taking it on. Very impressive!',
 "Is this something I'll be able to install on my site? When will you be releasing it?",
 'haha you guys are a bunch of losers.',
 'ur a sh*tty comment.',
 'hahahahahahahahhha suck it.',
 'FFFFUUUUUUUUUUUUUUU',
 'The ranchers seem motivated by mostly by greed; no one should have the right to allow their animals destroy public land.',
 "It was a great show. Not a combo I'd of expected to be good together but it was."]

In [82]:
###################################
#########
# predicting ratings on a dimension

# ...
# when we only have the dimension:
# vector scalar projection
def predict_scalarproj(veclist, dimension):
    dir_veclen = math.sqrt(np.dot(dimension, dimension))
    return [np.dot(v, dimension) / dir_veclen for v in veclist]

SBERtize the Comments

In [86]:
sentence_embs = [model.encode(row) for row in dataset["train"][:100]['text']]


In [None]:
Calculate complexity 

In [90]:
complexity_dimension

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.

In [88]:
complexities = predict_scalarproj(sentence_embs, dimvec)

# for i, emb in enumerate(sentence_embs):
#     dataset["train"][i]['complexity_computed'] = sentence_embs[i]
#     complexities.append( sentence_embs[i] )

#dataset["train"][:5]
complexities[:5]

  return [np.dot(v, dimension) / dir_veclen for v in veclist]


[nan, nan, nan, nan, nan]

In [None]:
dataset["train"][:10]['text']

In [None]:
import numpy as np
import scipy.stats

scipy.stats.pearsonr(complexities, scores)    # Pearson's r

In [None]:
complexities