In [1]:
%load_ext autoreload
%autoreload 2

import sys; sys.path.insert(0, '..')

from datetime import datetime
import json
import os

from tqdm.auto import tqdm

from paradeller.helper import DATE_FMT, load_archive
from paradeller.dataprep import load_and_prep
from paradeller.analysis import consolidate_stanzas
from paradeller.postprocess import stanza_sorter_maker, print_stanzas, print_poems

In [2]:
def get_tweet(i):
    """Find tweet with given id"""
    try:
        return next(x for x in data if x["id"] == i)
    except StopIteration as e:
        print("Error: No tweet with that ID")
        return None

showlen = lambda data: print(f"{len(data):,}")

In [3]:
data, duplicates, adj_list_words, adj_list_ids = load_and_prep(use_pickle=True)

# TO REFESH:
# data, duplicates, adj_list_words, adj_list_ids = load_and_prep(use_pickle=False, update_pickle=True)


Loading processed data from pickle...
--------------------------------------------------
DONE

data            type: <class 'list'>	len: 270,885
duplicates      type: <class 'dict'>	len: 5,365
adj_list_words  type: <class 'dict'>	len: 22,505
adj_list_ids    type: <class 'dict'>	len: 270,885


### Load Results

In [4]:
def _date_from_filename(filename):
    dt_string = filename.split('_')[1].split('.')[0]
    return datetime.strptime(dt_string, DATE_FMT)

def get_most_recent_result():
    found_folder = '../data/found'
    files = os.listdir(found_folder)
    files.sort(key=_date_from_filename)
    return os.path.join(found_folder, files[-1])

In [5]:
# choose file
found_file = get_most_recent_result()
print(found_file, end="\n\n")

# open file
with open(found_file) as f:
    res = json.load(f)

# print len of contents
for k, v in res.items():
    print(f"{k:12}:", len(v))

../data/found/results_2019-07-11-1953.json

meta        : 5
stanzas     : 12
poems       : 4
duplicates  : 24


### View Stanzas

In [6]:
stanza_sorter = stanza_sorter_maker(adj_list_ids)
sorted_stanzas = sorted(res['stanzas'], key=stanza_sorter, reverse=True)

KeyError: 16

In [7]:
# low
print("low :", stanza_sorter(sorted_stanzas[-1]))

# high
print("high:", stanza_sorter(sorted_stanzas[0]))

low : 46
high: 86


In [8]:
print_stanzas(sorted_stanzas, data, n=4)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@yannuuuh             I am so -- 
@yannuuuh             I am so -- 
@identitytheftt       I think I do 
@identitytheftt       I think I do 
@tsubasa_007          i think i am 
@igotpjy              so do i 
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@jnlsbb               I miss you. 
@jnlsbb               I miss you. 
@ninteythrees         i wish i had Friends ): 
@ninteythrees         i wish i had Friends ): 
@_ImCertified         I swear I miss you 💞 
@vgarcia1997          I swear I wish I had friends 😩😭😪 
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@brianallendg         I feel so.... :/ 
@brianallendg         I feel so.... :/ 
@048MISSPAM           I feel you. 
@048MISSPAM           I feel you. 
@ferd_cc              I do . you 
@igotpjy              so do i 
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@excerptsofelly       i love you 
@excerptsofelly       i love you 
@bbbeckss11           I hate this 
@bbbeck

### View Complete Poems

In [10]:
print_poems(res['poems'], data)

### Combinations Math

In [2]:
from math import factorial as fact

Number of combos:

- $n$ = types to choose from
- $r$ = number chosen

$\frac{n!}{r!(n-r)!}$

In [8]:
n = 254_123
r = 2

num_combos = fact(n) // (fact(r) * fact(n-r))

print(f"{num_combos:,}")

32,289,122,503


In [11]:
per = num_combos / 100
print(f"{per:,}")

322,891,225.03
