In [1]:
import tensorflow
from tqdm import tqdm

In [2]:
from datasets import load_dataset

In [3]:
# Importing IMDB Dataset

train_dataset = load_dataset('imdb', split='train')
test_dataset = load_dataset('imdb', split='test')

Reusing dataset imdb (/home/token/.cache/huggingface/datasets/imdb/plain_text/1.0.0/e3c66f1788a67a89c7058d97ff62b6c30531e05b549de56d3ab91891f0561f9a)
Reusing dataset imdb (/home/token/.cache/huggingface/datasets/imdb/plain_text/1.0.0/e3c66f1788a67a89c7058d97ff62b6c30531e05b549de56d3ab91891f0561f9a)


In [None]:
print(train_dataset)

In [None]:
print(train_dataset[0])

In [None]:
import contractions
from bs4 import BeautifulSoup
import unicodedata
import numpy as np
import re

def strip_html_tags(text):
    soup = BeautifulSoup(text, "html.parser")
    [s.extract() for s in soup(['iframe', 'script'])]
    stripped_text = soup.get_text()
    stripped_text = re.sub(r'[\r|\n|\r\n]+', '\n', stripped_text)
    return stripped_text

def remove_accented_chars(text):
    text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8', 'ignore')
    return text

def expand_contractions(text):
    return contractions.fix(text)

def remove_special_characters(text, remove_digits=False):
    pattern = r'[^a-zA-Z0-9\s]' if not remove_digits else r'[^a-zA-Z\s]'
    text = re.sub(pattern, '', text)
    return text

def pre_process_document(document):
    # strip HTML
    document = strip_html_tags(document)
    # lower case
    document = document.lower()
    # remove extra newlines (often might be present in really noisy text)
    document = document.translate(document.maketrans("\n\t\r", "   "))
    # remove accented characters
    document = remove_accented_chars(document)
    # expand contractions    
    document = expand_contractions(document)  
    # remove special characters and\or digits    
    # insert spaces between special characters to isolate them    
    #special_char_pattern = re.compile(r'([{.(-)}])')
    #document = special_char_pattern.sub(" \\1 ", document)
    #document = remove_special_characters(document, remove_digits=True)  
    # remove extra whitespace
    document = re.sub(' +', ' ', document)
    document = document.strip()
    
    return document


pre_process_corpus = np.vectorize(pre_process_document)

In [None]:
x_train = pre_process_corpus(train_dataset['text'])
x_train_preprocessed = []

for elt in x_train:
    x_train_preprocessed.append(np.array(elt.split(" ")))
    
y_train = train_dataset['label']



x_test_preprocessed = []

x_test = pre_process_corpus(test_dataset['text'])
y_test = test_dataset['label']

for elt in x_test:
    x_test_preprocessed.append(np.array(elt.split(" ")))

In [None]:
print(x_train_preprocessed[0])
print(x_test_preprocessed[0])

In [9]:
import math
from tqdm import tqdm

def naive_bayes_classifier_train(x_train, y_train):
    """
    Naive bayes Classifier training function

    Args:
        x train being the numerical values of words in the imdb dataset
        and y train its respective class, also noted as a numerical value

    Output: 
        An array of class respective weights, 
        A dict of probability for the key to be in each class, 
        The vocabulary
    """
    
    # tuple value/label in each element from imdb
    ndoc = len(x_train)
    nbclasses = len(np.unique(y_train))
    logprior = [0] * nbclasses
    bigdoc = [[]] * nbclasses
    voca = list(keras.datasets.imdb.get_word_index().values())
    loglikelyhood = np.ndarray((len(voca), nbclasses))
    for c in range(nbclasses):
        nlogdoc = math.log(ndoc)
        # masking the array to filter it
        c_class_train = x_train[y_train == c]
        nc = len(c_class_train)
        logprior[c] =  math.log(nc) / nlogdoc
        # getting the number of occurences in the vocabulary
        occu_voca = np.zeros(len(voca) + 1)
        for docu in tqdm(c_class_train):
            docu_hist = np.histogram(docu, bins=np.arange(len(voca) + 2))[0]
            occu_voca = np.add(occu_voca, docu_hist)
        nb_word_total  = get_total_length(c_class_train)
        for index in tqdm(range(len(voca))):
            value = math.log((occu_voca[index] + 1) /
                                      (nb_word_total + 1))
            loglikelyhood[index, c] = value
    return logprior, loglikelyhood, voca

In [10]:
logprior, loglikelyhood, voca = naive_bayes_classifier_train(x_train, y_train)

100%|██████████| 12500/12500 [00:11<00:00, 1066.76it/s]
100%|██████████| 88584/88584 [00:00<00:00, 426547.22it/s]
100%|██████████| 12500/12500 [00:11<00:00, 1099.29it/s]
100%|██████████| 88584/88584 [00:00<00:00, 421951.61it/s]


In [None]:
def test_naive_bayes(testdoc, logprior, loglikelyhood, C, V):
    """
    Naive bayes Classifier test function
    
    Inputs: The document on which we test
            The logprior vector
            The loglikelyhood vector
            The classes
            The vocabulary
    
    Output: The document class 
    """

    _sum = [0] * len(C)
    for c in range(len(C)):
        _sum[c] = logprior[c]
        for word in testdoc:
            try:
                #voca starts with a minimal value of 1
                #thus we match voca and loglikelyhood
                _sum[c] += loglikelyhood[word, c]
            except IndexError:
                #unrecognized word, thus we are not knowing in which class it belongs
                pass
    return np.argmax(_sum)


In [None]:
import pandas as pd

def crosstab(x_test):
    test_card = x_test.shape[0]
    res = np.zeros(test_card)
    for index in range(test_card):
        y_pred = test_naive_bayes(x_test[index], logprior, loglikelyhood, [0, 1], word_index)
        res[index] = y_pred

    return res

res = crosstab(x_test)
pd.crosstab(res ,y_test)

col_0,0,1
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,11002,3186
1.0,1498,9314


In [None]:
from sklearn.metrics import classification_report

target_names = ['negative review', 'positive review']
print(classification_report(y_test, res, target_names=target_names))

                 precision    recall  f1-score   support

negative review       0.78      0.88      0.82     12500
positive review       0.86      0.75      0.80     12500

       accuracy                           0.81     25000
      macro avg       0.82      0.81      0.81     25000
   weighted avg       0.82      0.81      0.81     25000



#### Pretreatment method

###### Stop Words

In [None]:
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
stop_words = set(stopwords.words('english'))

In [None]:
def encode_set_of_words(set_words):
    """
    Encode a set of words

    Inputs: The set of words to encode
    

    Output: The encoded set of words
    """
    L = []

    for word in set_words:
        try:
            L.append(word_index[word])

        except KeyError:
            pass

    return L

In [None]:
stop_words_encoded = encode_set_of_words(stop_words)

In [None]:
def delete_stop_words(document):
    """
    Delete stop words function
    
    Inputs: The document to filter
    
    Output: The filtered document 
    """
    delete_res = document.copy()
    for stop_word in stop_words_encoded:
        try :
            while (True):
                delete_res.remove(stop_word)
        
        except ValueError:
            pass
    
    return delete_res

In [None]:
# With Stop Words
print(decode_sentence(x_test[0]))

# Without Stop Words
print(decode_sentence(delete_stop_words(x_test[0])))

? please give this one a miss br br kristy swanson and the rest of the cast rendered terrible performances the show is flat flat flat br br i don't know how michael madison could have allowed this one on his plate he almost seemed to know this wasn't going to work out and his performance was quite lacklustre so all you madison fans give this a miss
please miss kristy swanson rest of rendered terrible performances show flat flat flat michael madison have allowed plate almost seemed wasn't going work performance quite lacklustre madison fans miss


In [None]:
x_test_without_sw = [delete_stop_words(el) for el in tqdm(x_test)]

100%|██████████| 25000/25000 [00:28<00:00, 891.81it/s]


In [None]:
x_test_without_sw = np.array(x_test_without_sw, dtype='object')
x_test_without_sw.shape

(25000,)

In [None]:
res = crosstab(x_test_without_sw)
pd.crosstab(res ,y_test)

col_0,0,1
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,10628,2471
1.0,1872,10029


In [None]:
print(classification_report(y_test, res, target_names=target_names))

                 precision    recall  f1-score   support

negative review       0.81      0.85      0.83     12500
positive review       0.84      0.80      0.82     12500

       accuracy                           0.83     25000
      macro avg       0.83      0.83      0.83     25000
   weighted avg       0.83      0.83      0.83     25000



###### Stemming

In [None]:
from nltk.stem.snowball import SnowballStemmer
from nltk.tokenize import word_tokenize 
nltk.download('punkt')

import re

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [None]:
x_test_decoded_list = decode_all_review(x_test, False, offset=0)
x_test_decoded_str = decode_all_review(x_test, offset=0)

 11%|█▏        | 2856/25000 [00:26<01:38, 225.82it/s]

In [None]:
def stemming(document, re_upper, stemmer):
    stemmed = [stemmer.stem(word).lower() for word in document]

    return encode_set_of_words(stemmed)

In [None]:
# Regex for the capital letters
re_upper = re.compile(r"^\w+$")

# Creation and application of the Stemmer
stemmer = SnowballStemmer("english")

In [None]:
# Without Stemming
print(decode_sentence(x_test[0]))

# With Stemming
print(decode_sentence(stemming(x_test_decoded_list[0], re_upper, stemmer), offset = 0))

? please give this one a miss br br kristy swanson and the rest of the cast rendered terrible performances the show is flat flat flat br br i don't know how michael madison could have allowed this one on his plate he almost seemed to know this wasn't going to work out and his performance was quite lacklustre so all you madison fans give this a miss
the wonder own as by is i i jar rose to of hollywood br of down shout get bore of ever it sad sad sad i i was then doe don't close faint after one as by are be all turn in doe as three part in some to be with world her an have faint begin own as is


In [None]:
## Applying the Stemming on the entire x_test 
x_test_stemmed = [stemming(el, re_upper, stemmer) for el in tqdm(x_test_decoded_list)]

  9%|▊         | 2135/25000 [00:12<02:01, 188.32it/s][A
  9%|▊         | 2155/25000 [00:12<02:08, 177.23it/s][A
  9%|▊         | 2174/25000 [00:12<02:22, 160.71it/s][A
  9%|▉         | 2191/25000 [00:12<02:32, 149.62it/s][A
  9%|▉         | 2207/25000 [00:13<02:40, 142.25it/s][A
  9%|▉         | 2224/25000 [00:13<02:34, 147.66it/s][A
  9%|▉         | 2240/25000 [00:13<02:35, 146.76it/s][A
  9%|▉         | 2257/25000 [00:13<02:31, 150.32it/s][A
  9%|▉         | 2273/25000 [00:13<02:44, 138.22it/s][A
  9%|▉         | 2289/25000 [00:13<02:39, 142.07it/s][A
  9%|▉         | 2308/25000 [00:13<02:27, 153.96it/s][A
  9%|▉         | 2324/25000 [00:13<02:30, 151.00it/s][A
  9%|▉         | 2340/25000 [00:13<02:34, 146.44it/s][A
  9%|▉         | 2355/25000 [00:14<02:38, 143.16it/s][A
  9%|▉         | 2370/25000 [00:14<02:54, 129.91it/s][A
 10%|▉         | 2385/25000 [00:14<02:49, 133.52it/s][A
 10%|▉         | 2403/25000 [00:14<02:36, 144.66it/s][A
 10%|▉         | 2422/25000 [00

In [None]:
decode_sentence(x_test_stemmed[0], offset=0)

"the wonder own as by is i i jar rose to of hollywood br of down shout get bore of ever it sad sad sad i i was then doe don't close faint after one as by are be all turn in doe as three part in some to be with world her an have faint begin own as is"

In [None]:
x_test_stemmed = np.array(x_test_stemmed, dtype='object')
x_test_stemmed.shape

(25000,)

In [None]:
res = crosstab(x_test_stemmed)
pd.crosstab(res ,y_test)

col_0,0,1
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,11056,4821
1.0,1444,7679


In [None]:
print(classification_report(y_test, res, target_names=target_names))

                 precision    recall  f1-score   support

negative review       0.70      0.88      0.78     12500
positive review       0.84      0.61      0.71     12500

       accuracy                           0.75     25000
      macro avg       0.77      0.75      0.74     25000
   weighted avg       0.77      0.75      0.74     25000



###### Lemming

In [None]:
import spacy
!pip install -U tensorflow==2.3.0

Collecting tensorflow==2.3.0
  Downloading tensorflow-2.3.0-cp37-cp37m-manylinux2010_x86_64.whl (320.4 MB)
[K     |████████████████████████████████| 320.4 MB 46 kB/s 
Collecting scipy==1.4.1
  Downloading scipy-1.4.1-cp37-cp37m-manylinux1_x86_64.whl (26.1 MB)
[K     |████████████████████████████████| 26.1 MB 42.1 MB/s 
Collecting numpy<1.19.0,>=1.16.0
  Downloading numpy-1.18.5-cp37-cp37m-manylinux1_x86_64.whl (20.1 MB)
[K     |████████████████████████████████| 20.1 MB 39.3 MB/s 
[?25hCollecting tensorflow-estimator<2.4.0,>=2.3.0
  Downloading tensorflow_estimator-2.3.0-py2.py3-none-any.whl (459 kB)
[K     |████████████████████████████████| 459 kB 47.3 MB/s 
Installing collected packages: numpy, tensorflow-estimator, scipy, tensorflow
  Attempting uninstall: numpy
    Found existing installation: numpy 1.19.5
    Not uninstalling numpy at /shared-libs/python3.7/py/lib/python3.7/site-packages, outside environment /root/venv
    Can't uninstall 'numpy'. No files were found to uninst

In [None]:
!python -m spacy download en_core_web_sm

2021-10-02 12:40:18.882213: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'libcudart.so.10.1'; dlerror: libcudart.so.10.1: cannot open shared object file: No such file or directory
2021-10-02 12:40:18.882275: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
Collecting en-core-web-sm==3.1.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.1.0/en_core_web_sm-3.1.0-py3-none-any.whl (13.6 MB)
[K     |████████████████████████████████| 13.6 MB 14.1 MB/s 
Installing collected packages: en-core-web-sm
Successfully installed en-core-web-sm-3.1.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [None]:
nlp = spacy.load("en_core_web_sm")

In [None]:
def lemming(document):
    # Decode the document
    #document_decoded = decode_sentence(document)
    
    lemmas = [token.lemma_.lower() for token in nlp(document)]

    return encode_set_of_words(lemmas)

In [None]:
decode_sentence(lemming(x_test_decoded_str[0]))

"? please give this one his miss br br whore eight and the rest of the cast impressions much performances the show is flat flat flat br br his don't bad bad moores's michael madison could have allowed this one his his picked he almost seemed to bad this wasn't going to work out and his performance was quite up all you madison fans give this his miss"

In [None]:
x_test_lemmed = [lemming(el) for el in tqdm(x_test_decoded_str)]


  0%|          | 0/25000 [00:00<?, ?it/s][A
  0%|          | 3/25000 [00:00<35:13, 11.82it/s][A
  0%|          | 6/25000 [00:00<23:45, 17.54it/s][A
  0%|          | 9/25000 [00:00<32:50, 12.68it/s][A
  0%|          | 11/25000 [00:00<33:04, 12.59it/s][A
  0%|          | 13/25000 [00:01<32:55, 12.65it/s][A
  0%|          | 15/25000 [00:01<30:59, 13.43it/s][A
  0%|          | 18/25000 [00:01<26:01, 16.00it/s][A
  0%|          | 21/25000 [00:01<23:37, 17.62it/s][A
  0%|          | 25/25000 [00:01<20:10, 20.63it/s][A
  0%|          | 28/25000 [00:01<22:41, 18.35it/s][A
  0%|          | 30/25000 [00:01<23:23, 17.79it/s][A
  0%|          | 33/25000 [00:01<20:35, 20.21it/s][A
  0%|          | 36/25000 [00:02<22:28, 18.51it/s][A
  0%|          | 38/25000 [00:02<24:49, 16.76it/s][A
  0%|          | 41/25000 [00:02<24:19, 17.10it/s][A
  0%|          | 43/25000 [00:02<28:09, 14.77it/s][A
  0%|          | 45/25000 [00:02<32:29, 12.80it/s][A
  0%|          | 48/25000 [00:03<27:32,

KeyboardInterrupt: 

In [None]:
x_test_lemmed = np.array(x_test_lemmed, dtype='object')
x_test_lemmed.shape

NameError: name 'np' is not defined

In [None]:
res = crosstab(x_test_stemmed)
pd.crosstab(res ,y_test)

In [None]:
print(classification_report(y_test, res, target_names=target_names))

## Regression Logistique

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.linear_model import LogisticRegression
import pandas as pd

In [None]:
def get_reviews_from_class(x_train, y_train, _class, V):
    reviews = x_train[y_train == _class]
    
    for i in range (len(reviews)):
        decode_sentence = reviews[i]
        reviews[i] = decode_sentence
    return reviews

In [None]:
def import_lexicon(path):
    data = pd.read_csv(path, sep='\t', names=[0, 1, 2, 3])
    df = pd.DataFrame()
    df['token'] = data[0]
    df['sentiment'] = data[1]
    return df

In [None]:
print(import_lexicon("vader_lexicon.txt"))

     token  sentiment
0       $:       -1.5
1       %)       -0.4
2      %-)       -1.5
3      &-:       -0.4
4       &:       -0.7
...    ...        ...
7514    }:       -2.1
7515   }:(       -2.0
7516   }:)        0.4
7517  }:-(       -2.1
7518  }:-)        0.3

[7519 rows x 2 columns]


In [None]:
def does_no_appear(review) -> int:
    if "no" in review:
        return 1
    return 0

In [None]:
def count_first_and_second_pro(review) -> int:
    count = 0
    for char in review:
        if char in ["I", "i", "you", "yours"]:
            count += 1
    return count

In [None]:
def exclamation_in_doc(review):
    if "!" in review:
            return 1
    return 0

In [None]:
def log_word_count_in_doc(review):
    return np.log(len(review))

In [None]:
def number_of_words_pos(review, lexicon):
    count = 0
    columns = ['token']
    tmp = lexicon[lexicon.sentiment > 0]
    positive_words = tmp[columns].to_numpy().tolist()
    number_of_pos = np.in1d(positive_words, review)
    return sum(number_of_pos)

In [None]:
def number_of_words_neg(review, lexicon):
    count = 0
    columns = ['token']
    tmp = lexicon[lexicon.sentiment < 0]
    negative_words = tmp[columns].to_numpy().tolist()
    number_of_neg = np.in1d(negative_words, review)
    return sum(number_of_neg)

In [None]:
V = keras.datasets.imdb.get_word_index()
reviews = get_reviews_from_class(x_train, y_train, 0, V)
print(reviews)
lexicon = import_lexicon("vader_lexicon.txt")

[list([1, 194, 1153, 194, 8255, 78, 228, 5, 6, 1463, 4369, 5012, 134, 26, 4, 715, 8, 118, 1634, 14, 394, 20, 13, 119, 954, 189, 102, 5, 207, 110, 3103, 21, 14, 69, 188, 8, 30, 23, 7, 4, 249, 126, 93, 4, 114, 9, 2300, 1523, 5, 647, 4, 116, 9, 35, 8163, 4, 229, 9, 340, 1322, 4, 118, 9, 4, 130, 4901, 19, 4, 1002, 5, 89, 29, 952, 46, 37, 4, 455, 9, 45, 43, 38, 1543, 1905, 398, 4, 1649, 26, 6853, 5, 163, 11, 3215, 10156, 4, 1153, 9, 194, 775, 7, 8255, 11596, 349, 2637, 148, 605, 15358, 8003, 15, 123, 125, 68, 23141, 6853, 15, 349, 165, 4362, 98, 5, 4, 228, 9, 43, 36893, 1157, 15, 299, 120, 5, 120, 174, 11, 220, 175, 136, 50, 9, 4373, 228, 8255, 5, 25249, 656, 245, 2350, 5, 4, 9837, 131, 152, 491, 18, 46151, 32, 7464, 1212, 14, 9, 6, 371, 78, 22, 625, 64, 1382, 9, 8, 168, 145, 23, 4, 1690, 15, 16, 4, 1355, 5, 28, 6, 52, 154, 462, 33, 89, 78, 285, 16, 145, 95])
 list([1, 14, 47, 8, 30, 31, 7, 4, 249, 108, 7, 4, 5974, 54, 61, 369, 13, 71, 149, 14, 22, 112, 4, 2401, 311, 12, 16, 3711, 33, 75, 4

In [None]:
def LoRegression(X_train, y_train):
    nb_class = 2
    lexicon = import_lexicon("vader_lexicon.txt")
    X_features_of_all_the_class = []
    V = keras.datasets.imdb.get_word_index()

    # Preprocessing

    for _class in range (nb_class):
        reviews = get_reviews_from_class(X_train, y_train, _class, V)
        features = []

        for review in tqdm(reviews):
            feature = []
            feature.append(does_no_appear(review))
            feature.append(count_first_and_second_pro(review))
            feature.append(exclamation_in_doc(review))
            feature.append(log_word_count_in_doc(review))
            feature.append(number_of_words_neg(review, lexicon))
            feature.append(number_of_words_pos(review, lexicon))
            features.append(feature)

        X_features_of_all_the_class.append(features)
    return np.asarray(X_features_of_all_the_class)

In [None]:
X_features_of_all_the_class = LoRegression(x_train, y_train)

NameError: name 'x_train' is not defined

In [None]:
clf = LogisticRegression(random_state=0).fit(X_features_of_all_the_class, y_train)
y_pred = clf.predict(y_train)
target_names = ['class 0', 'class 1']
print(classification_report(y_train, y_pred, target_names=target_names))

In [None]:
y_pred = LoRegression(x_train, y_train)
target_names = ['class 0', 'class 1']
print(classification_report(y_true, y_pred, target_names=target_names))

 96%|█████████▌| 12003/12500 [14:00<00:38, 13.02it/s][A[A[A[A



 96%|█████████▌| 12005/12500 [14:00<00:39, 12.67it/s][A[A[A[A



 96%|█████████▌| 12007/12500 [14:00<00:37, 13.27it/s][A[A[A[A



 96%|█████████▌| 12009/12500 [14:00<00:37, 12.95it/s][A[A[A[A



 96%|█████████▌| 12011/12500 [14:01<00:36, 13.41it/s][A[A[A[A



 96%|█████████▌| 12013/12500 [14:01<00:37, 13.02it/s][A[A[A[A



 96%|█████████▌| 12015/12500 [14:01<00:35, 13.64it/s][A[A[A[A



 96%|█████████▌| 12017/12500 [14:01<00:32, 14.87it/s][A[A[A[A



 96%|█████████▌| 12019/12500 [14:01<00:29, 16.09it/s][A[A[A[A



 96%|█████████▌| 12022/12500 [14:01<00:25, 18.70it/s][A[A[A[A



 96%|█████████▌| 12025/12500 [14:01<00:24, 19.12it/s][A[A[A[A



 96%|█████████▌| 12028/12500 [14:01<00:23, 20.32it/s][A[A[A[A



 96%|█████████▌| 12031/12500 [14:02<00:23, 19.79it/s][A[A[A[A



 96%|█████████▋| 12034/12500 [14:02<00:22, 20.76it/s][A[A[A[A



 96%|█████████▋| 12037/12500 [14:0

ValueError: Expected 2D array, got 1D array instead:
array=[2.0e+00 2.5e+04].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=cbc1336d-08c2-4a02-80be-88ba31af3d32' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>