# load data

In [1]:
import sqlite3
import pandas as pd
import re
from urllib.parse import urlparse
import requests

In [2]:
from datetime import date
from datetime import timedelta

## load tweets older then two weeks

In [3]:
def load_tweets(db_path, days):
    time_delta = date.today() - timedelta(days=days)
    cnx = sqlite3.connect(db_path)
    query = f"SELECT id,user, full_text, created_at, lang, quoted_status, in_reply_to_status_id FROM tweets WHERE created_at < '{str(time_delta)}'"
    #TODO add restrain, to remove tweets I liked, but for that I need to setup another cron job too.
    df = pd.read_sql_query(
        query,
        cnx,
    )
    return df


In [4]:
df_tweets = load_tweets("../home.db", days=21)

# utils

In [5]:
df_tweets.tail()

Unnamed: 0,id,user,full_text,created_at,lang,quoted_status,in_reply_to_status_id
42168,1347682164830216195,4086730154,I am LOSING IT https://twitter.com/vianerds/st...,2021-01-08T23:10:41+00:00,en,,
42169,1347684498243260417,24673980,UK covid levels were roughly equivalent to Aus...,2021-01-08T23:19:57+00:00,en,,
42170,1347688943073058818,742415719,I spend my life trying to *show* people Maths....,2021-01-08T23:37:37+00:00,en,,
42171,1347689778762936335,1037087125966151682,good thing I just wrote about the intersection...,2021-01-08T23:40:56+00:00,en,,
42172,1347694352923992064,3231016735,"new to twitter, what's up everybody?",2021-01-08T23:59:07+00:00,en,,


In [6]:
def find_url(tweet):
    url = re.findall(r"http\S+", tweet)
    return url


def clean_links(tweet):
    tweet = re.sub(r"bit.ly/\S+", "", tweet)
    tweet = re.sub(r"t.co/\S+", "", tweet)
    tweet = re.sub(r"buff.ly/\S+", "", tweet)
    tweet = re.sub(r"https://twitter.com/\S+", "", tweet)
    tweet = re.sub(r"http://twitter.com/\S+", "", tweet)
    return tweet

def remove_tw_urls(tweet):
    tweet = re.sub(r"https://twitter.com/\S+", "", tweet)
    tweet = re.sub(r"http://twitter.com/\S+", "", tweet)
    return tweet

def expand_link(url):
    if url in ["https://", "http://"]:
        return url
    else:
        try:
            session = requests.Session()
            resp = session.head(url, allow_redirects=True)
            long_url = resp.url
            return long_url
        except:
            return url

def get_domain(url):
    domain = urlparse(url).netloc
    dot_split = domain.split(".")
    if domain == "twitter.com":
        pass
    if len(dot_split) > 2:
        return ".".join(dot_split[1:])
    else:
        return domain


def remove_empty_str(l):
    for i in l:
        if len(i) == 0:
            l.remove(i)
    return l

In [7]:
def find_news(df, news_domains_list):
    df["clean_text"] = df["full_text"].apply(remove_tw_urls)
    df["urls"] = df["clean_text"].apply(find_url)
    df["urls"] = df.urls.apply(lambda x: [clean_links(d) for d in x])
    df["domains"] = df.urls.apply(lambda x: [get_domain(d) for d in x])
    df["domains"] = df.domains.apply(remove_empty_str)

    new_columns_list = []
    max_nr_dom = df.domains.str.len().max()
    for i in range(max_nr_dom):
        new_columns_list.append(f"domain{i+1}")
    df[new_columns_list] = pd.DataFrame(df.domains.tolist())

    for col in new_columns_list:
        df[col] = df[col].isin(news_domains_list)

    df["contains_news"] = df[new_columns_list].sum(axis=1)
    df["contains_news"] = df.contains_news.apply(lambda x: x if x == 0 else 1)
    df.drop(new_columns_list, axis=1, inplace=True)

    return df

def drop_contains(df, column_name, word_list):
    for string in word_list:
        df["lower"] = df["full_text"].str.lower()
        df = df[df["lower"].str.contains(string)]
        df.drop(["lower"], axis=1, inplace=True)
    return df

# searching for not-expanded news urls

In [8]:
dfz = df_tweets.copy()

In [9]:
with open("news_domains.txt", "r") as f:
    news_domains = json.loads(f.read())
dfz = find_news(dfz, news_domains)

In [10]:
dfz["has_domain"] = dfz.domains.apply(lambda x: len(x))

In [11]:
dfz.head()

Unnamed: 0,id,user,full_text,created_at,lang,quoted_status,in_reply_to_status_id,clean_text,urls,domains,contains_news,has_domain
0,922321981,16298441,no,2008-09-15T17:25:20+00:00,und,,,no,[],[],0,0
1,2627602600,21454322,"Went on a USO trip to Guantanamo Bay, Cuba a f...",2009-07-14T05:15:27+00:00,en,,,"Went on a USO trip to Guantanamo Bay, Cuba a f...",[],[],0,0
2,70261648811761665,5416652,"I wish I had kept my 1,700 BTC @ $0.06 instead...",2011-05-16T22:57:37+00:00,en,,,"I wish I had kept my 1,700 BTC @ $0.06 instead...",[],[],0,0
3,177008089394970624,5110861,"In 1996 a man took a NZ radio station hostage,...",2012-03-06T12:29:51+00:00,en,,,"In 1996 a man took a NZ radio station hostage,...",[http://edition.cnn.com/WORLD/fringe/9603/03-2...,[cnn.com],1,1
4,193480622533120001,175624200,I can send an IP packet to Europe faster than ...,2012-04-20T23:25:49+00:00,en,,,I can send an IP packet to Europe faster than ...,[],[],0,0


In [12]:
# dfz["has_domain"] = dfz.domains.apply(lambda x: 1 if len(x) > 0 else 0)
dfz["has_domain"] = dfz.domains.apply(lambda x: len(x))

In [13]:
dfz = dfz[dfz.has_domain > 0]
dfz.shape

(9176, 12)

In [14]:
dfz = dfz[dfz.contains_news == 0]
dfz.shape

(7378, 12)

In [15]:
dfz.head()

Unnamed: 0,id,user,full_text,created_at,lang,quoted_status,in_reply_to_status_id,clean_text,urls,domains,contains_news,has_domain
17,451159870306549761,159169312,Course slides from @Stanford and @stanfordsyms...,2014-04-02T00:51:09+00:00,en,,,Course slides from @Stanford and @stanfordsyms...,[http://stanford.edu/~zdar/week1.pdf],[stanford.edu],0,1
24,574518676575162369,68132773,Robert Capa's Omaha Beach by Dominique Bertail...,2015-03-08T10:35:01+00:00,en,,,Robert Capa's Omaha Beach by Dominique Bertail...,[http://bandedessinee.blog.lemonde.fr/2014/06/...,[blog.lemonde.fr],0,1
28,627427921624481792,794010396,600cals per day for 7 days reverses T2 diabete...,2015-08-01T10:37:28+00:00,en,,,600cals per day for 7 days reverses T2 diabete...,[http://link.springer.com/article/10.1007/s001...,[springer.com],0,1
39,681513454931345408,2895499182,Found out there is a Fake Kanji creation conte...,2015-12-28T16:34:04+00:00,en,,,Found out there is a Fake Kanji creation conte...,[http://www.sankeisquare.com/event/kanjicontes...,[sankeisquare.com],0,1
49,720706866934710272,15804774,Kite looks pretty cool: machine-learning appli...,2016-04-14T20:14:41+00:00,en,,,Kite looks pretty cool: machine-learning appli...,[https://kite.com/],[kite.com],0,1


In [16]:
domain_list = dfz.domains.tolist()

In [17]:
domains = []
for i in domain_list:
    for d in i:
        if d in domains:
            pass
        else:
            domains.append(d)

In [18]:
dfz["urls_exp"] = 0

In [19]:
dfz.head()

Unnamed: 0,id,user,full_text,created_at,lang,quoted_status,in_reply_to_status_id,clean_text,urls,domains,contains_news,has_domain,urls_exp
17,451159870306549761,159169312,Course slides from @Stanford and @stanfordsyms...,2014-04-02T00:51:09+00:00,en,,,Course slides from @Stanford and @stanfordsyms...,[http://stanford.edu/~zdar/week1.pdf],[stanford.edu],0,1,0
24,574518676575162369,68132773,Robert Capa's Omaha Beach by Dominique Bertail...,2015-03-08T10:35:01+00:00,en,,,Robert Capa's Omaha Beach by Dominique Bertail...,[http://bandedessinee.blog.lemonde.fr/2014/06/...,[blog.lemonde.fr],0,1,0
28,627427921624481792,794010396,600cals per day for 7 days reverses T2 diabete...,2015-08-01T10:37:28+00:00,en,,,600cals per day for 7 days reverses T2 diabete...,[http://link.springer.com/article/10.1007/s001...,[springer.com],0,1,0
39,681513454931345408,2895499182,Found out there is a Fake Kanji creation conte...,2015-12-28T16:34:04+00:00,en,,,Found out there is a Fake Kanji creation conte...,[http://www.sankeisquare.com/event/kanjicontes...,[sankeisquare.com],0,1,0
49,720706866934710272,15804774,Kite looks pretty cool: machine-learning appli...,2016-04-14T20:14:41+00:00,en,,,Kite looks pretty cool: machine-learning appli...,[https://kite.com/],[kite.com],0,1,0


In [20]:
# import json
# short_url =[]
# for c, i in enumerate(dfz.urls):
#     print(c)
#     if (c + 1) % 100 == 0:
#         print(f"{(c+1)} / {len(dfz.urls)}")
#     for d in i:
#         dd = get_domain(d)
#         e = expand_link(d)
#         de = get_domain(e)
#         if not (de == dd):
#             short_url.append({"short_url": dd, "long_url": de, "is_news": (de in news_domains)})
#     with open('short_url', 'w') as file:
#         file.write(json.dumps(short_url, indent=4))



In [21]:
with open('short_url') as json_file:
    short_url = json.load(json_file)

In [22]:
short_url[1]

{'short_url': 'bloom.bg', 'long_url': 'bloomberg.com', 'is_news': True}

In [23]:
# expand_link(url)

In [24]:
dfz.urls[921:922]

4335    [https://st.news/2ImPHpV]
Name: urls, dtype: object

In [25]:
# x = 922
# for c, i in enumerate(dfz.urls[x:]):
#     print(c+x, i)
#     if (c + x + 1) % 100 == 0:
#         print(f"{(c+1)} / {len(dfz.urls)}")
#     for d in i:
#         dd = get_domain(d)
#         e = expand_link(d)
#         de = get_domain(e)
#         if not (de == dd):
#             short_url.append({"short_url": dd, "long_url": de, "is_news": (de in news_domains)})
#     with open('short_url', 'w') as file:
#         file.write(json.dumps(short_url, indent=4))

In [26]:
dfz.urls[1100:1101]

5227    [http://www.fdle.state.fl.us/News/2020/Decembe...
Name: urls, dtype: object

In [27]:
# x = 1101
# for c, i in enumerate(dfz.urls[x:]):
#     print(c+x, i)
#     if (c + x + 1) % 100 == 0:
#         print(f"{(c+1)} / {len(dfz.urls)}")
#     for d in i:
#         dd = get_domain(d)
#         e = expand_link(d)
#         de = get_domain(e)
#         if not (de == dd):
#             short_url.append({"short_url": dd, "long_url": de, "is_news": (de in news_domains)})
#     with open('short_url', 'w') as file:
#         file.write(json.dumps(short_url, indent=4))

In [28]:
dfz.urls[3295:3296]

16617    [https://blogs.oracle.com/cloud-infrastructure...
Name: urls, dtype: object

In [29]:
# x = 3296
# for c, i in enumerate(dfz.urls[x:]):
#     print(c+x, i)
#     if (c+x+1) % 100 == 0:
#         print(f"{(c+x+1)} / {len(dfz.urls)}")
#     for d in i:
#         dd = get_domain(d)
#         e = expand_link(d)
#         de = get_domain(e)
#         if not (de == dd):
#             short_url.append({"short_url": dd, "long_url": de, "is_news": (de in news_domains)})
#     with open('short_url', 'w') as file:
#         file.write(json.dumps(short_url, indent=4))

In [30]:
dfz.urls[5652:5653]

31074    [https://blogs.oracle.com/javamagazine/modern-...
Name: urls, dtype: object

In [31]:
x = 5653
for c, i in enumerate(dfz.urls[x:]):
    print(c+x, i)
    if (c+x+1) % 100 == 0:
        print(f"{(c+x+1)} / {len(dfz.urls)}")
    for d in i:
        dd = get_domain(d)
        e = expand_link(d)
        de = get_domain(e)
        if not (de == dd):
            short_url.append({"short_url": dd, "long_url": de, "is_news": (de in news_domains)})
    with open('short_url', 'w') as file:
        file.write(json.dumps(short_url, indent=4))

5653 ['https://link.medium.com/skTaa59mCcb']
5654 ['https://github.com/DeepGraphLearning/LiteratureDL4Graph']
5655 ['https://blog.repl.it/tabbed-shell']
5656 ['https://www.metaeyes.com']
5657 ['http://courses.openmined.org']
5658 ['http://bostonreview.net/science-nature-race/ned-block-race-genes-and-iq']
5659 ['https://mitsloan.co/2GDvWcy']
5660 ['https://forms.gle/N2hAJQPJBNAvdMBB7']
5661 ['https://zitniklab.hms.harvard.edu/TDC/benchmark/admet_group/', 'https://github.com/mims-harvard/TDC']
5662 ['https://www.youtube.com/watch?v=f-JdMt056jA&t=616s']
5663 ['https://ourworldindata.org/covid-vaccinations]']
5664 ['https://www.harvardartmuseums.org/collections/object/194840']
5665 ['https://ourworldindata.org/grapher/covid-vaccination-doses-per-capita']
5666 ['http://samoburja.com/competition-for-power/']
5667 ['https://youtu.be/9ajwtKWH8ng']
5668 ['https://youtu.be/fn3KWM1kuAw']
5669 ['https://marginalrevolution.com/marginalrevolution/2020/12/vaccinate-24-7.html']
5670 ['https://elifesci

5788 ['http://Patreon.com/ShiflettBros.', 'https://www.instagram.com/p/CJaUwpRpbge/?igshid=dspkkqx0l638']
5789 ['https://www.pscp.tv/w/crrHEDE0NDkyNzk4fDF5cEtkZ2pwUVZqeFepICV0VjfRZlB4nCXA_idhHI6zQ4BEWR9npz5C-vUBmg==']
5790 ['https://changingtimes.media/2020/12/28/covid-19-new-report-says-cognitive-dysfunction-strikes-long-haulers-in-all-age-groups/']
5791 ['https://youtu.be/bhg2Xt_hNJA']
5792 ['https://commons.wikimedia.org/wiki/File:EBM_small.gif', 'https://github.com/hadyelsahar/EBM-visualizations']
5793 ['https://onesignal.com/blog/onesignal-code-freeze-experience/']
5794 ['https://www.whoownsmyavailability.com/']
5795 ['https://shouldideploy.today/']
5796 ['http://shouldideploytoday.com']
5797 ['https://nyti.ms/34WfmOj']
5798 ['https://grapheneos.org/articles/sitewide-advertising-industry-opt-out.']
5799 ['https://sites.google.com/view/neurips2020rwrl)']
5800 / 7378
5800 ['https://www.manning.com/books/rust-in-action']
5801 ['http://pragmaticurl.com/standardresume', 'http://pragmat

5894 ['https://www.susanrosechina.co.uk/']
5895 ['https://www.youtube.com/watch?v=6SWpN64Ivb4&t=166s']
5896 ['https://www.econtalk.org/katherine-levine-einstein-on-neighborhood-defenders/']
5897 ['https://www.housingforcalifornia.org/proposals/incentives']
5898 ['https://www.lastweekinaws.com/blog/counting-twitter-followers-over-time-the-corey-quinn-way/']
5899 ['https://www.harvardartmuseums.org/collections/object/310608']
5900 / 7378
5900 ['https://www.psychnewsdaily.com/study-links-mindfulness-meditation-to-narcissism-and-spiritual-superiority/']
5901 ['https://antonhowes.substack.com/p/age-of-invention-the-paradox-of-progress', 'https://fivebooks.com/best-books/the-best-nature-books-of-2020-charles-foster/', 'https://arxiv.org/abs/2012.14244']
5902 ['http://halcyonrealms.com/books/ghibli-museum-mitaka-guide-book-review/']
5903 ['https://www.housingforcalifornia.org/proposals/incentives']
5904 ['https://allenai.org/newsletters/archive/2020-12-newsletter.html']
5905 ['https://www.emp

6005 ['https://miami.vcstarterkit.com/']
6006 ['https://www.atlasobscura.com/articles/lemon-pigs-new-year']
6007 ['https://blog.circleboom.com/how-can-i-delete-all-my-tweets-that-i-cant-see/']
6008 ['http://book.bionumbers.org']
6009 ['https://open.spotify.com/track/5q6GyhYQUAgCWDuMZDXXUG?si=pHRBJ4RsT0akkjWDivf7tA']
6010 ['https://www.harvardartmuseums.org/collections/object/195402']
6011 ['https://www.amazon.com/Where-Mathematics-Come-Embodied-Brings/dp/0465037712']
6012 ['http://tinyurl.com/y7qoj3ba']
6013 ['http://www.metmuseum.org/art/collection/search/15463']
6014 ['https://t.…']
6015 ['https://andymatuschak.org/prompts']
6016 ['https://www.patreon.com/quantumcountry).']
6017 ['https://www.pscp.tv/w/cr0awzFEWktveUxZcmRFYXl8MWRqR1hxa3prRVZKWu1L9QHBJ1x2Fr6CJwS0pDBx9L5mBAvnF_xX5EZyJ5SW']
6018 ['https://greenwald.substack.com/p/the-kafkaesque-imprisonment-of-julian']
6019 ['https://slothwerks.medium.com/slothwerks-reflections-on-2020-8d304f8340dd']
6020 ['https://arxiv.org/abs/2012.14

6112 ['https://www.mobileatscale.com/,']
6113 ['https://danwang.co/2020-letter/']
6114 ['https://github.com/jaceklaskowski/trino-meetups/tree/main/minikube#trino-on-minikube']
6115 ['https://noahpinion.substack.com/p/shots-into-arms-now']
6116 ['https://www.econlib.org/when-may-we-be-happy/']
6117 ['https://danwang.co/2020-letter/']
6118 ['https://masknetwork.medium.com/a-new-chapter-of-mask-network-ito-aa8182a0badc']
6119 ['https://www.patreon.com/posts/new-release-2-1-45673700']
6120 ['https://gumroad.com/l/BLwER']
6121 ['http://www.metmuseum.org/art/collection/search/15463']
6122 ['https://www.reddit.com/r/MachineLearning/comments/kod9ze/p_probabilistic_machine_learning_an_introduction/']
6123 ['https://www.ribaj.com/intelligence/rethink-2025-themes-reclaim-street-workplace-villages-suburbia-living']
6124 ['https://clevelandart.org/art/1927.397']
6125 ['https://www.reddit.com/r/MachineLearning/comments/kod9ze/p_probabilistic_machine_learning_an_introduction/']
6126 ['https://link.me

6242 ['http://halcyonrealms.com/books/icon-of-europe-mythologylegendfairy-tales-art-book-review/']
6243 ['https://www.digikey.com/schemeit/project/']
6244 ['https://www.digikey.com/schemeit/project/']
6245 ['https://www.ebay.com/itm/IBM-1992-OS-2-OFFICIAL-AMBASSADOR-22K-GOLD-COIN-EXTENDED-SERVICE-LAN-SERVER/264998413277']
6246 ['https://www.youtube.com/watch?v=f05AfVjjYc8']
6247 ['https://www.ebay.com/itm/METACREATIONS-Bryce-3D-CD-ROM-Software-MAC-PC-NEW-OLD-STOCK-NOS-1997/203236029781']
6248 ['https://www.flickr.com/gp/90487619@N04/He23Mb']
6249 ['https://www.ebay.com/itm/373410547587']
6250 ['https://www.ebay.com/itm/373410550452']
6251 ['https://arxiv.org/abs/2012.15856)']
6252 ['https://www.mobileatscale.com/,']
6253 ['https://en.wikipedia.org/wiki/Robert_Sheckley']
6254 ['https://www.zeit.de/politik/ausland/2020-12/brexit-great-britain-economy-covid-19-european-union-nationalism/komplettansicht']
6255 ['http://deepskyanchor.com/three-laws/']
6256 ['http://faceoff.eegedu.com']
6257

6354 ['https://www.harvardartmuseums.org/collections/object/197620']
6355 ['https://dev.to/jingjing142/from-politics-to-programming-how-i-changed-careers-during-a-pandemic-bap']
6356 ['https://www.harvardartmuseums.org/collections/object/198216']
6357 ['https://www.harvardartmuseums.org/collections/object/198216']
6358 ['https://www.twitch.tv/videos/840896832']
6359 ['https://boomfestival.org/boom2021/news/boom-news/liminal-podquest-3-nerd-immunity-collective-wisdom-and-conspiracy-theories/']
6360 ['https://www.mobileatscale.com/)']
6361 ['https://www.youtube.com/watch?v=frfNxOyFQhg&feature=youtu.be']
6362 ['https://conta.cc/2JoJHgV']
6363 ['https://colab.research.google.com/drive/1f3GOVrhwBrtltnwKrafzaKL3fKtIKqgb?usp=sharing']
6364 ['https://www.quantamagazine.org/how-close-are-computers-to-automating-mathematical-reasoning-20200827/']
6365 ['https://elidourado.com/blog/notes-on-technology-2020s']
6366 ['https://www.confluent.io/blog/soa-vs-eda-is-not-life-simply-a-series-of-events/']

6443 ['https://gumroad.com/l/profit-and-loss']
6444 ['https://livejapan.com/en/in-tokyo/in-pref-tokyo/in-tokyo_train_station/article-a0004524/']
6445 ['https://youtu.be/ampmQg33JtY']
6446 ['https://biwin.co.uk/ghibli-papercraft/ghibli-papercraft-studio-ghibli-my-neighbor-totoro-kusakabe-house-paper-craft-kit-1#']
6447 ['https://instagram.com/rnmoffatt']
6448 ['https://balajis.com/the-purpose-of-technology/)']
6449 ['https://app.box.com/s/4dc0p12fn01lqvahr0v7tzjt05ek1tza']
6450 ['https://www.lefigaro.fr/medias/le-patron-de-cnews-affirme-qu-olivier-veran-boycotte-sa-chaine-20210103']
6451 ['https://www.wish.com/c/5bcf3d93652a572c559240e6']
6452 ['https://www.gov.uk/government/publications/tfc-children-and-transmission-update-paper-17-december-2020']
6453 ['https://youtube.com/watch?v=raLQWq_PRIE']
6454 ['https://trib.al/WF7wqQS']
6455 ['https://www.imj.org.il/en/collections/400781']
6456 ['https://flic.kr/p/2jYYwe9']
6457 ['https://www.cubeecraft.com/cubees/moebius']
6458 ['http://artand

6561 ['https://news.ycombinator.com/item?id=25623858']
6562 ['https://www.wikiart.org/en/john-singer-sargent/jupiter-beseeching-eros']
6563 ['https://akirathedon.bandcamp.com/album/meditations-vol-1']
6564 ['https://www.swyx.io/psychology-of-money/']
6565 ['http://bible.com']
6566 ['https://longnow.org/seminars/02012/nov/28/enchanted-sun-coevolution-light-life-and-color-earth/']
6567 ['https://en.wikipedia.org/wiki/Bessel_function?wprov=sfla1']
6568 ['https://kaminsky.rocks/2021/01/advice-for-getting-into-data/']
6569 ['http://ssalewski.de/gtkprogramming.html#_gaction']
6570 ['http://www.julienrivoire.com/']
6571 ['https://matthewdf10.medium.com/how-to-enable-logging-on-every-aws-service-in-existence-circa-2021-5b9105b87c9']
6572 ['https://blog.circleboom.com/how-can-i-get-more-followers-with-twitter-analytics/']
6573 ['https://www.gingrich360.com/2021/01/conservative-georgians-must-turn-out-for-americas-sake/']
6574 ['https://www.instagram.com/p/CJmkNzUJmYg/?igshid=126e1uupa8q96']
657

6675 ['https://www.gov.pl/web/gis/ostrzezenie-publiczne-dotyczace-zywnosci-obecnosci-bakterii-listeria-monocytogenes-w-partii-produktu-pn-ser-goralski-mala-galka-wedzona-balser-180-g?fbclid=IwAR2CrXXZHgGCemYaY1RCrMXIu7FhAx6U9xC2NtiHBqQ6ExC-i0gcfCFNDms']
6676 ['https://dabeaz.com/courses.html']
6677 ['https://marginalrevolution.com/marginalrevolution/2021/01/new-evidence-that-spices-fruits-from-asia-had-reached-the-mediterranean-earlier-than-thought.html']
6678 ['https://www.gov.pl/web/gis/ostrzezenie-publiczne-dotyczace-wyrobu-do-kontaktu-z-zywnoscia-migracja-pierwszorzedowych-amin-aromatycznych-z-produktu-pn-chochla']
6679 ['https://github.com/getsentry/sentry/pull/22889']
6680 ['https://link.medium.com/cGqUUBsBKcb']
6681 ['https://lambdalabs.com/blog/nvidia-rtx-a6000-benchmarks/']
6682 ['http://lrb.me/8jh']
6683 ['https://github.com/open-mmlab/mmtracking']
6684 ['https://youtu.be/-nH4OSyjwSI']
6685 ['http://reut.rs/3b7tZSM']
6686 ['https://joshuahhh.github.io/noise-draw/']
6687 ['htt

6784 ['https://openai.com/blog/dall-e/']
6785 ['https://chuckecheese.com/delivery-to-go/pasquallys-pizza-wings/']
6786 ['https://link.medium.com/Z5rJHZlgOcb']
6787 ['https://www.harvardartmuseums.org/collections/object/307747']
6788 ['https://psyteachr.github.io/msc-data-skills/']
6789 ['https://openai.com/blog/dall-e/']
6790 ['https://www.youtube.com/watch?v=LdOe18KhtT4']
6791 ['https://www.buymeacoffee.com/janel/c/660796']
6792 ['https://ia800602.us.archive.org/view_archive.php?archive=/21/items/AllFritz/All%20Fritz.zip']
6793 ['https://en.wikipedia.org/wiki/Three_Governors_controversy']
6794 ['https://www.youtube.com/watch?v=tN_Svv4LzOI&feature=youtu.be']
6795 ['http://sfdharmacollective.eventbrite.com/']
6796 ['https://www.amazon.com/exec/obidos/ISBN=0231184379/essentialreadingA/']
6797 ['https://www.youtube.com/watch?v=0JQ0xnJyb0A']
6798 ['https://goodliving.com/podcasts/time-off-building-a-rest-ethic-with-max-frenzel/']
6799 ['https://lobste.rs/s/t9sz67/fostering_culture_values_s

6907 ['https://youtu.be/Lt4Z5oOAeEY', 'https://']
6908 ['https://www.harvardartmuseums.org/collections/object/194887']
6909 ['https://biorxiv.org/cgi/content/short/2021.01.05.425426v1']
6910 ['https://www.sciencedirect.com/science/article/pii/S0012369220342410']
6911 ['https://hotosm.bamboohr.com/jobs/view.php?id=24']
6912 ['https://github.com/xssfox/nr-basic']
6913 ['https://github.com/jina-ai/jina/releases/tag/v0.9.0']
6914 ['https://datatalks.club/books/20210111-reinforcement-learning.html']
6915 ['http://celsius.network:']
6916 ['https://www.slowboring.com/p/making-policy-for-a-low-trust-world']
6917 ['https://www.harvardartmuseums.org/collections/object/198026']
6918 ['https://www.goodreads.com/book/show/480002.Historical_Dynamics?ac=1&from_search=true&qid=vKzp0PiF83&rank=1']
6919 ['https://www.lloydkahn.com/2021/01/crackerjack-half-milers/']
6920 ['https://anchor.fm/datatalksclub/episodes/Processes-in-a-Data-Science-Project---Alexey-Grigorev-encdlg']
6921 ['https://dev.to/jbszcze

7021 ['https://', 'https://vimeo.com/398813930]']
7022 ['http://bor.com/n/oahmfy']
7023 ['https://blog.circleboom.com/why-is-twitter-visual-vital-for-effective-twitter-marketing/']
7024 ['https://www.lastweekinaws.com/blog/terrible-ideas-for-avoiding-aws-data-transfer-costs/']
7025 ['https://www.youtube.com/c/beforesafters/videos']
7026 ['https://sahillavingia.com/work']
7027 ['https://anchor.fm/chaitimedatascience/episodes/Emil-Wallner-Art--ML--Being-Internet-Taught--Creating-ML-Content-133-eo6fld', 'https://youtu.be/ENbKecYgITA']
7028 ['https://www.complexityexplorer.org/courses/116-foundations-applications-of-humanities-analytics']
7029 ['https://trib.al/oL3hExF']
7030 ['https://santafe.edu/engage/learn/alumni/jenny-huang']
7031 ['https://todoist.com/review/2020/']
7032 ['http://tmz.me/tbSvrgK']
7033 ['https://publicdomainreview.org/essay/revolutionary-colossus']
7034 ['https://', 'https://']
7035 ['https://www.theemotionmachine.com/how-to-create-psychological-distance-between-you-a

7139 ['https://arxiv.org/pdf/2101.02663.pdf', 'https://arxiv.org/abs/2101.02663']
7140 ['https://arxiv.org/pdf/2101.02672.pdf', 'https://arxiv.org/abs/2101.02672', 'https://github.com/AutoVision-cloud/SA-Det3D']
7141 ['https://www.instagram.com/tv/CJxKMArpN0_/?igshid=1gieqy5188vry']
7142 ['https://www.harvardartmuseums.org/collections/object/304649']
7143 ['https://arxiv.org/pdf/2101.02691.pdf', 'https://arxiv.org/abs/2101.02691']
7144 ['https://arxiv.org/pdf/2101.02692.pdf', 'https://arxiv.org/abs/2101.02692', 'https://cs.stanford.edu/~kaichun/where2act/']
7145 ['https://github.com/tgrosinger/advanced-tables-obsidian']
7146 ['https://www.harvardartmuseums.org/collections/object/230618']
7147 ['https://vogue-try-on.github.io/']
7148 ['https://alphabetworkersunion.org/']
7149 ['https://reut.rs/2XocsxA']
7150 ['https://youtu.be/lnRxqrouTUE?t=10']
7151 ['https://reut.rs/2XocsxA']
7152 ['https://github.com/YatingMusic/compound-word-transformer', 'https://ailabs.tw/human-interaction/compoun

7250 ['https://www.thetythebarn.co.uk/whats-on/blog/virtual-venue-tour/']
7251 ['https://www.kaggle.com/c/riiid-test-answer-prediction/discussion/209581']
7252 ['https://podcasts.apple.com/us/podcast/econtalk/id135066958?i=1000503684242']
7253 ['https://www.copmadrid.org/web/formacion/actividades/20201028115954133249/sdo2101-curso-experto-psicologia-esports']
7254 ['https://rmoff.net/2020/04/16/a-quick-and-dirty-way-to-monitor-data-arriving-on-kafka/']
7255 ['https://groups.google.com/a/tensorflow.org/g/swift/c/x1Spv9b97w0']
7256 ['https://sahillavingia.com/work']
7257 ['https://podcasts.apple.com/us/podcast/econtalk/id135066958?i=1000503684242']
7258 ['https://tinyurl.com/y2y4uxgf', 'https://tinyurl.com/yy7ecaz6']
7259 ['https://krillbitestudio.itch.io/sunlight', 'https://']
7260 ['http://store.mountaindew.com']
7261 ['http://youtu.be/TzXBB3Uf19E']
7262 ['https://mobile.twitter.com/NuPopulism/status/1347522536146427904']
7263 ['https://t.c…']
7264 ['https://is.gd/cgcmJA']
7265 ['https

7367 ['https://link.medium.com/kwEBu6r6Scb']
7368 ['https://longnow.org/seminars/02017/oct/30/engineering-gene-safety/']
7369 ['https://torrentfreak.com/sci-hub-founder-criticises-sudden-twitter-ban-over-over-counterfeit-content-210108/']
7370 ['https://www.harvardartmuseums.org/collections/object/198272']
7371 ['https://superrare.co/artwork-v2/altered-carbon-18007']
7372 ['http://jessicajburton.co.uk/assets/videos/DavidChapman20201221.mp4']
7373 ['https://warontherocks.com/2020/07/thrones-wreathed-in-shadow-tacitus-and-the-psychology-of-authoritarianism/']
7374 ['https://www.psymposia.com/magazine/jake-angeli-psychedelic-shaman-capitol-insurrection-trump/']
7375 ['https://soundcloud.com/agora_politics/22-cultivating-a-world-we-can-live-in-with-jason-snyder']
7376 ['https://www.eventbrite.com/e/hold-america-together-national-gathering-tickets-135916460793?utm-medium=discovery&utm-campaign=social&utm-content=attendeeshare&aff=escb&utm-source=cp&utm-term=listing']
7377 ['https://link.med