In [1]:
from gsn import *
from conceptnet import *
from imagenet import *

In [2]:
# get gsn statistics of word counts
yearly_abs_counts, yearly_percent_counts, word2idx = get_word_counts(start_year=1800, end_year=2000, min_total_count=20000)

100%|██████████| 99/99 [22:30<00:00, 13.65s/it]


In [5]:
gsn_vocab = word2idx.keys()
word2cnp_counts, word2idx, weighted_co_occurrences = get_cnp_stats(gsn_vocab)

3408it [54:35,  1.04it/s]


In [3]:
# get mappings between ImageNet index, WordNetID (wnid) and words
image_paths, wnids = get_img_paths()
wnid2img_path_idx = get_wnid2img_path_idx(image_paths)
word2wnids = get_word2wnids(wnids)
word2img_idx = get_word2img_idx(word2wnids, wnid2img_path_idx)


100%|██████████| 21841/21841 [12:16<00:00, 29.67it/s]


In [2]:
import pickle
# get words with sufficient ontological and visual representations
min_cnp_edges = 10
min_img_num = 100

cnp_word2idx = pickle.load(open('/h/19/jadeleiyu/frame_extension/data/cnp/word2cnp_idx.p', 'rb'))
word2cnp_counts = pickle.load(open('/h/19/jadeleiyu/frame_extension/data/cnp/word2cnp_counts.p', 'rb'))
word2img_idx = pickle.load(open('/h/19/jadeleiyu/frame_extension/data/img/imagenet_word2img_idx.p','rb'))
cnp_vocab = set(cnp_word2idx.keys())
img_vocab = set(word2img_idx.keys())

cnp_img_common_vocab = set([word for word in cnp_vocab if word in img_vocab
                            and word2cnp_counts[word] >= min_cnp_edges
                            and len(word2img_idx[word]) >= min_img_num])

In [3]:
# get words with sufficient GSN occurrences

word2gsn_idx = pickle.load(open('/h/19/jadeleiyu/frame_extension/data/gsn/word2idx.p', 'rb'))
gsn_yearly_abs_counts = pickle.load(open('/h/19/jadeleiyu/frame_extension/data/gsn/yearly_abs_counts.p', 'rb'))
gsn_vocab = set(word2gsn_idx.keys())
gsn_word2total_count_by_pos = pickle.load(open('/h/19/jadeleiyu/frame_extension/data/gsn/word2total_count.p', 'rb'))


freq_threshold = 100000
candidate_support_nouns = set([word for word in cnp_img_common_vocab
                           if sum(gsn_word2total_count_by_pos[word]) >= freq_threshold
                           and word in gsn_vocab
                           ])

In [4]:
len(candidate_support_nouns)

4135

In [5]:
# choose candidate verbs with sufficient frequency in GSN
min_verb_freq = 60000
candidate_verbs = [word for word in word2gsn_idx.keys() if
                   gsn_word2total_count_by_pos[word][1] >= min_verb_freq]

In [6]:
len(candidate_verbs)

5660

In [None]:
# extract v-r-n frame usages from GSN
rel_types = {'nsubj', 'dobj', 'iobj', 'pobj'}
preps = {'in', 'by', 'to', 'with', 'on', 'from', 'for', 'at', 'as', 'like', 'of', 'into', 'about', 'under'}
frame_df = get_frame_data(candidate_support_nouns, candidate_verbs, rel_types, preps)
frame_df.to_csv('/h/19/jadeleiyu/frame_extension/data/gsn/gsn_frame_df.csv', index=False)

In [1]:
import pandas as pd
frame_df = pd.read_csv('/h/19/jadeleiyu/frame_extension/data/gsn/gsn_frame_df.csv')
# aggregate frame usages with the same (noun, verb, relation) triples
agg_func = {}
for decade in range(1800, 2010, 10):
    agg_func['count in {}s'.format(decade)] = sum
grouped_frame_df = frame_df.groupby(['verb', 'relation', 'noun']).agg(agg_func).reset_index()

In [11]:
# choose frame usages with sufficient frequencies
grouped_frame_df['total count'] = grouped_frame_df.iloc[:, 3:].sum(axis=1)

In [17]:
total_freq_theta = 100
grouped_frame_df = grouped_frame_df[grouped_frame_df['total count'] >=100]

Unnamed: 0,verb,relation,noun,count in 1800s,count in 1810s,count in 1820s,count in 1830s,count in 1840s,count in 1850s,count in 1860s,...,count in 1920s,count in 1930s,count in 1940s,count in 1950s,count in 1960s,count in 1970s,count in 1980s,count in 1990s,count in 2000s,total count
1,abandon,dobj,abode,1,3,7,8,11,26,17,...,3,3,1,13,7,11,9,6,37,239.0
4,abandon,dobj,action,0,0,6,0,1,9,15,...,25,30,26,67,73,147,135,175,229,1062.0
8,abandon,dobj,aircraft,0,0,0,0,0,0,0,...,0,3,47,48,85,56,93,80,195,607.0
17,abandon,dobj,animal,0,0,2,1,10,16,5,...,6,9,3,14,17,23,24,44,138,411.0
19,abandon,dobj,apartment,0,0,0,0,0,0,0,...,7,3,2,3,7,4,6,16,48,105.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
334,abandon,dobj,framework,0,0,0,0,0,0,0,...,2,0,4,15,39,44,79,167,171,529.0
335,abandon,dobj,friend,56,62,100,92,112,138,98,...,92,88,67,79,190,174,199,308,819,3294.0
337,abandon,dobj,fruit,0,1,2,0,1,6,7,...,12,34,24,36,80,58,56,45,102,561.0
338,abandon,dobj,furniture,0,0,0,0,0,0,0,...,9,19,8,6,6,1,11,8,14,118.0


In [19]:
for i in range(21):
    decade = 1800 + i*10
    grouped_frame_df['total count up to {}s'.format(decade)] = grouped_frame_df.iloc[:, 3:4+i].sum(axis=1)

In [22]:
grouped_frame_df.to_csv('/h/19/jadeleiyu/frame_extension/data/gsn/vr_grouped_frame_df_100.csv', index=False)

In [23]:
# aggregate frame usages by their support nouns
agg_func = {'noun': lambda x: list(x)}
for decade in range(1800, 2010, 10):
    agg_func['count in {}s'.format(decade)] = lambda x: list(x)
    agg_func['total count up to {}s'.format(decade)] = lambda x: list(x)
    
grouped_frame_df = grouped_frame_df.groupby(['verb', 'relation']).agg(agg_func).reset_index()

In [25]:
len(grouped_frame_df['verb'])

28168

In [26]:
# save the grouped data frame of frame usages
grouped_frame_df.to_csv('/h/19/jadeleiyu/frame_extension/data/gsn/vrn_grouped_frame_df.csv', index=False)

In [2]:
# get the counts for each support noun by decade in frame_df
# so that when computing historical conceptnet embeddings later, we weight each node by its frequency on every decade
# in this way we can effectively avoid the affect of OCR-like errors
# e.g. even if the word "car" is present in GSN at 1800s, its extremely low frequency will make it contribute very 
# few to the diachronic embeddings at 1800s

import pandas as pd
frame_df = pd.read_csv('/h/19/jadeleiyu/frame_extension/data/gsn/gsn_frame_df.csv')
agg_func = {}
for decade in range(1800, 2010, 10):
    agg_func['count in {}s'.format(decade)] = sum
noun_grouped_frame_df = frame_df.groupby(['noun']).agg(agg_func).reset_index()
noun_grouped_frame_df.to_csv('/h/19/jadeleiyu/frame_extension/data/gsn/noun_decade_counts_df.csv', index=False)

Unnamed: 0,noun,count in 1800s,count in 1810s,count in 1820s,count in 1830s,count in 1840s,count in 1850s,count in 1860s,count in 1870s,count in 1880s,...,count in 1910s,count in 1920s,count in 1930s,count in 1940s,count in 1950s,count in 1960s,count in 1970s,count in 1980s,count in 1990s,count in 2000s
0,abacus,1,7,14,18,43,74,78,94,119,...,220,106,102,83,252,395,313,329,449,738
1,abbess,34,47,113,200,181,209,156,175,228,...,251,376,198,102,169,209,133,140,191,1448
2,abbey,780,1182,1606,1882,2468,3248,2801,3933,5142,...,3954,3321,2539,1816,3131,4159,3736,3473,4592,16910
3,abode,592,1106,2532,4571,5523,6934,5229,5857,8517,...,5793,4284,2814,2256,3174,5273,4806,3847,4952,26054
4,aboriginal,0,0,5,26,32,36,26,22,47,...,37,41,55,40,161,174,258,201,290,517
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,anomaly,27,49,185,349,381,675,701,964,1348,...,1704,1466,1296,1754,3076,7109,9451,12493,17574,25802
96,ant,206,312,484,492,451,934,770,1403,3428,...,3288,2928,2171,2155,3012,4089,3913,4300,6516,19163
97,antecedent,184,200,441,635,710,1350,1489,2173,3044,...,2502,1865,1429,1348,2645,4707,5072,6989,9472,17104
98,antelope,20,46,72,157,255,621,349,479,808,...,693,620,437,333,541,911,856,806,1085,4176


In [4]:
# compute numpy array of decade counts for support nouns 
import pandas as pd
import numpy as np

noun_grouped_frame_df = pd.read_csv('/h/19/jadeleiyu/frame_extension/data/gsn/noun_decade_counts_df.csv')
support_nouns = list(noun_grouped_frame_df['noun'])
support_noun2idx = {support_nouns[i]:i for i in range(len(support_nouns))}

noun_decade_counts = np.zeros((21, len(support_nouns)))
for decade in range(1800, 2010, 10):
    decade_idx = int((decade - 1800) / 10)
    noun_decade_counts[decade_idx] = np.array(noun_grouped_frame_df['count in {}s'.format(decade)])

pickle.dump(noun_decade_counts.astype(int), open('/h/19/jadeleiyu/frame_extension/data/gsn/noun_decade_counts.p', 'wb'))
pickle.dump(support_noun2idx, open('/h/19/jadeleiyu/frame_extension/data/gsn/support_noun2idx.p', 'wb'))

In [1]:
# compute conceptnet historical representations for all words in cnp vocab
# later we will only make use of the embeddings of support nouns 
import pickle
from conceptnet import *

word2cnp_idx = pickle.load(open('/h/19/jadeleiyu/frame_extension/data/cnp/word2cnp_idx.p', 'rb'))
weighted_co_occurrences = pickle.load(open('/h/19/jadeleiyu/frame_extension/data/cnp/weighted_co_occurrences.p', 'rb'))
noun_decade_counts = pickle.load(open('/h/19/jadeleiyu/frame_extension/data/gsn/noun_decade_counts.p', 'rb'))
support_noun2idx = pickle.load(open('/h/19/jadeleiyu/frame_extension/data/gsn/support_noun2idx.p', 'rb'))

compute_cnp_hist_embeddings(support_noun2idx, word2cnp_idx, weighted_co_occurrences, noun_decade_counts)

 24%|██▍       | 5/21 [00:00<00:01, 10.49it/s]

converting co-occurrence matrix of decade 1850 into PPMI matrix...


  ppmi = sparse.diags(1 / word_counts).dot(counts_csr)
  ppmi = ppmi.dot(sparse.diags(1 / smooth_context_freqs))


performing SVD on PPMI matrix of decade 1850...


100%|██████████| 21/21 [00:26<00:00,  1.27s/it]


In [None]:
# compute imagenet representations for support nouns 
from imagenet import compute_visual_representations
import pickle
support_noun2idx = pickle.load(open('/h/19/jadeleiyu/frame_extension/data/gsn/support_noun2idx.p', 'rb'))
image_paths = pickle.load(open('/h/19/jadeleiyu/frame_extension/data/img/imagenet_image_paths.p', 'rb'))
word2img_idx = pickle.load(open('/h/19/jadeleiyu/frame_extension/data/img/imagenet_word2img_idx.p', 'rb'))

compute_visual_representations(support_noun2idx, image_paths, word2img_idx, img_sample_size=64)

In [43]:
import pandas as pd
grouped_frame_df = pd.read_csv('/h/19/jadeleiyu/frame_extension/data/gsn/vrn_grouped_frame_df.csv')

In [44]:
from gsn import learning_df_prep
learning_df_prep(grouped_frame_df, start_dec=1850, end_dec=2000, freq_theta=5)

  0%|          | 0/16 [00:00<?, ?it/s]
0it [00:00, ?it/s][A
191it [00:00, 1909.05it/s][A
350it [00:00, 1799.94it/s][A
542it [00:00, 1834.08it/s][A
770it [00:00, 1946.30it/s][A
989it [00:00, 1999.35it/s][A
1173it [00:00, 1946.00it/s][A
1355it [00:00, 1887.77it/s][A
1530it [00:00, 1689.93it/s][A
1775it [00:00, 1862.47it/s][A
1963it [00:01, 1464.81it/s][A
2145it [00:01, 1552.39it/s][A
2397it [00:01, 1753.82it/s][A
2634it [00:01, 1894.54it/s][A
2839it [00:01, 1734.96it/s][A
3026it [00:01, 1770.54it/s][A
3213it [00:01, 1787.98it/s][A
3399it [00:01, 1722.27it/s][A
3629it [00:01, 1860.20it/s][A
3859it [00:02, 1972.24it/s][A
4080it [00:02, 2037.32it/s][A
4289it [00:02, 1954.57it/s][A
4489it [00:02, 1881.92it/s][A
4681it [00:02, 1868.99it/s][A
4891it [00:02, 1923.69it/s][A
5086it [00:02, 1806.51it/s][A
5316it [00:02, 1929.78it/s][A
5514it [00:02, 1895.72it/s][A
5707it [00:03, 1881.34it/s][A
5915it [00:03, 1933.82it/s][A
6162it [00:03, 2067.89it/s][A
6373it [00:03,

decade 1850s has 305879 established usages for meta-learning, 71143 novel usages in next decade, 316050 novel usages in all future decades


  6%|▋         | 1/16 [00:17<04:18, 17.26s/it]
0it [00:00, ?it/s][A
167it [00:00, 1588.15it/s][A
343it [00:00, 1635.73it/s][A
481it [00:00, 1542.25it/s][A
656it [00:00, 1598.49it/s][A
842it [00:00, 1667.69it/s][A
1024it [00:00, 1671.33it/s][A
1174it [00:00, 1599.17it/s][A
1363it [00:00, 1675.54it/s][A
1524it [00:00, 1549.47it/s][A
1759it [00:01, 1724.56it/s][A
1936it [00:01, 1436.51it/s][A
2102it [00:01, 1491.22it/s][A
2362it [00:01, 1708.84it/s][A
2589it [00:01, 1842.92it/s][A
2789it [00:01, 1845.61it/s][A
2984it [00:01, 1748.91it/s][A
3168it [00:01, 1666.17it/s][A
3342it [00:01, 1637.92it/s][A
3514it [00:02, 1637.42it/s][A
3707it [00:02, 1714.84it/s][A
3883it [00:02, 1726.68it/s][A
4075it [00:02, 1779.74it/s][A
4273it [00:02, 1832.85it/s][A
4459it [00:02, 1627.17it/s][A
4628it [00:02, 1556.31it/s][A
4807it [00:02, 1615.56it/s][A
4973it [00:02, 1578.10it/s][A
5134it [00:03, 1517.37it/s][A
5331it [00:03, 1627.98it/s][A
5498it [00:03, 1610.63it/s][A
5690it

decade 1860s has 362379 established usages for meta-learning, 54547 novel usages in next decade, 276146 novel usages in all future decades


 12%|█▎        | 2/16 [00:32<03:53, 16.71s/it]
0it [00:00, ?it/s][A
328it [00:00, 3277.99it/s][A
705it [00:00, 3410.29it/s][A
1099it [00:00, 3551.21it/s][A
1434it [00:00, 3485.92it/s][A
1821it [00:00, 3591.97it/s][A
2128it [00:00, 2688.82it/s][A
2394it [00:00, 2623.49it/s][A
2655it [00:00, 2494.04it/s][A
2905it [00:01, 2250.33it/s][A
3135it [00:01, 2123.05it/s][A
3352it [00:01, 1944.92it/s][A
3553it [00:01, 1870.88it/s][A
3759it [00:01, 1923.51it/s][A
3974it [00:01, 1983.35it/s][A
4197it [00:01, 2048.58it/s][A
4405it [00:01, 1819.74it/s][A
4594it [00:01, 1757.81it/s][A
4775it [00:02, 1741.10it/s][A
4953it [00:02, 1694.23it/s][A
5126it [00:02, 1579.03it/s][A
5321it [00:02, 1670.23it/s][A
5492it [00:02, 1676.77it/s][A
5671it [00:02, 1705.75it/s][A
5918it [00:02, 1879.04it/s][A
6135it [00:02, 1956.56it/s][A
6337it [00:02, 1924.08it/s][A
6570it [00:03, 2029.24it/s][A
6778it [00:03, 1869.20it/s][A
6971it [00:03, 1711.03it/s][A
7173it [00:03, 1792.21it/s][A
735

decade 1870s has 402993 established usages for meta-learning, 51733 novel usages in next decade, 238346 novel usages in all future decades


 19%|█▉        | 3/16 [00:49<03:38, 16.82s/it]
0it [00:00, ?it/s][A
172it [00:00, 1717.73it/s][A
319it [00:00, 1634.33it/s][A
434it [00:00, 1450.52it/s][A
585it [00:00, 1460.58it/s][A
778it [00:00, 1574.84it/s][A
989it [00:00, 1698.03it/s][A
1145it [00:00, 1617.12it/s][A
1311it [00:00, 1627.46it/s][A
1468it [00:00, 1566.40it/s][A
1662it [00:01, 1660.80it/s][A
1872it [00:01, 1609.20it/s][A
2033it [00:01, 1387.04it/s][A
2222it [00:01, 1506.32it/s][A
2479it [00:01, 1718.35it/s][A
2704it [00:01, 1847.58it/s][A
2902it [00:01, 1766.15it/s][A
3089it [00:01, 1779.79it/s][A
3275it [00:01, 1791.71it/s][A
3460it [00:02, 1611.37it/s][A
3629it [00:02, 1633.09it/s][A
3878it [00:02, 1820.69it/s][A
4093it [00:02, 1907.09it/s][A
4292it [00:02, 1725.74it/s][A
4491it [00:02, 1794.30it/s][A
4678it [00:02, 1720.03it/s][A
4856it [00:02, 1657.70it/s][A
5027it [00:03, 1496.65it/s][A
5231it [00:03, 1625.51it/s][A
5416it [00:03, 1037.87it/s][A
5566it [00:03, 1142.39it/s][A
5777it 

decade 1880s has 441941 established usages for meta-learning, 50853 novel usages in next decade, 200278 novel usages in all future decades


 25%|██▌       | 4/16 [01:09<03:32, 17.72s/it]
0it [00:00, ?it/s][A
323it [00:00, 3188.25it/s][A
632it [00:00, 3156.78it/s][A
1002it [00:00, 3301.24it/s][A
1310it [00:00, 3230.07it/s][A
1610it [00:00, 3155.38it/s][A
1884it [00:00, 3005.89it/s][A
2182it [00:00, 2996.27it/s][A
2626it [00:00, 3319.65it/s][A
2966it [00:00, 3342.64it/s][A
3296it [00:01, 3242.85it/s][A
3624it [00:01, 3252.69it/s][A
4003it [00:01, 3395.93it/s][A
4344it [00:01, 2202.25it/s][A
4621it [00:01, 2346.10it/s][A
4935it [00:01, 2536.86it/s][A
5232it [00:01, 2645.96it/s][A
5575it [00:01, 2840.31it/s][A
5969it [00:01, 3099.47it/s][A
6368it [00:02, 3321.50it/s][A
6720it [00:02, 3268.69it/s][A
7061it [00:02, 3302.85it/s][A
7478it [00:02, 3522.36it/s][A
7842it [00:02, 3328.74it/s][A
8185it [00:02, 3331.69it/s][A
8558it [00:02, 3441.48it/s][A
8908it [00:02, 3444.07it/s][A
9261it [00:02, 3468.48it/s][A
9644it [00:03, 3568.53it/s][A
10004it [00:03, 3569.12it/s][A
10363it [00:03, 3283.73it/s][A
1

decade 1890s has 481733 established usages for meta-learning, 49796 novel usages in next decade, 161543 novel usages in all future decades


 31%|███▏      | 5/16 [01:20<02:51, 15.57s/it]
0it [00:00, ?it/s][A
172it [00:00, 1715.22it/s][A
319it [00:00, 1630.72it/s][A
449it [00:00, 1513.61it/s][A
642it [00:00, 1617.22it/s][A
846it [00:00, 1715.54it/s][A
1024it [00:00, 1711.19it/s][A
1183it [00:00, 1672.06it/s][A
1340it [00:00, 1640.05it/s][A
1496it [00:00, 1454.02it/s][A
1727it [00:01, 1635.18it/s][A
1897it [00:01, 846.74it/s] [A
2028it [00:01, 945.64it/s][A
2207it [00:01, 1101.49it/s][A
2405it [00:01, 1269.33it/s][A
2637it [00:01, 1468.17it/s][A
2819it [00:01, 1468.46it/s][A
2991it [00:02, 1504.49it/s][A
3161it [00:02, 1557.92it/s][A
3335it [00:02, 1605.73it/s][A
3514it [00:02, 1634.28it/s][A
3687it [00:02, 1660.26it/s][A
3916it [00:02, 1797.74it/s][A
4116it [00:02, 1852.53it/s][A
4307it [00:02, 1657.46it/s][A
4481it [00:02, 1593.06it/s][A
4646it [00:03, 1555.39it/s][A
4835it [00:03, 1642.33it/s][A
5004it [00:03, 1507.49it/s][A
5160it [00:03, 1518.86it/s][A
5363it [00:03, 1642.66it/s][A
5541it 

decade 1900s has 521976 established usages for meta-learning, 46825 novel usages in next decade, 124271 novel usages in all future decades


 38%|███▊      | 6/16 [01:39<02:46, 16.65s/it]
0it [00:00, ?it/s][A
167it [00:00, 480.07it/s][A
323it [00:00, 604.80it/s][A
461it [00:00, 726.76it/s][A
643it [00:00, 886.49it/s][A
849it [00:00, 1069.13it/s][A
1014it [00:00, 1193.66it/s][A
1161it [00:00, 1247.17it/s][A
1315it [00:01, 1318.67it/s][A
1462it [00:01, 1317.59it/s][A
1651it [00:01, 1448.67it/s][A
1868it [00:01, 1607.18it/s][A
2043it [00:01, 1214.15it/s][A
2251it [00:01, 1383.19it/s][A
2460it [00:01, 1537.38it/s][A
2697it [00:01, 1718.26it/s][A
2892it [00:02, 1670.96it/s][A
3076it [00:02, 1693.77it/s][A
3257it [00:02, 1684.65it/s][A
3434it [00:02, 1538.43it/s][A
3625it [00:02, 1632.43it/s][A
3810it [00:02, 1692.12it/s][A
4020it [00:02, 1793.96it/s][A
4205it [00:02, 1780.34it/s][A
4387it [00:02, 1586.09it/s][A
4553it [00:03, 1596.96it/s][A
4753it [00:03, 1698.83it/s][A
4928it [00:03, 1657.26it/s][A
5098it [00:03, 1597.00it/s][A
5305it [00:03, 1714.02it/s][A
5481it [00:03, 1712.01it/s][A
5704it [00

decade 1910s has 560586 established usages for meta-learning, 34254 novel usages in next decade, 98232 novel usages in all future decades


 44%|████▍     | 7/16 [01:55<02:28, 16.46s/it]
0it [00:00, ?it/s][A
195it [00:00, 1943.45it/s][A
349it [00:00, 1795.27it/s][A
489it [00:00, 1652.53it/s][A
667it [00:00, 1688.70it/s][A
962it [00:00, 1936.33it/s][A
1138it [00:00, 1764.47it/s][A
1305it [00:00, 1705.19it/s][A
1470it [00:00, 1552.15it/s][A
1696it [00:00, 1712.16it/s][A
1872it [00:01, 1681.81it/s][A
2044it [00:01, 1430.34it/s][A
2252it [00:01, 1575.81it/s][A
2453it [00:01, 1683.81it/s][A
2667it [00:01, 1795.43it/s][A
2856it [00:01, 1750.37it/s][A
3038it [00:01, 1727.44it/s][A
3216it [00:01, 1677.59it/s][A
3388it [00:01, 1576.55it/s][A
3563it [00:02, 1623.64it/s][A
3744it [00:02, 1673.93it/s][A
3976it [00:02, 1826.10it/s][A
4209it [00:02, 1949.61it/s][A
4411it [00:02, 1709.29it/s][A
4592it [00:02, 1683.27it/s][A
4790it [00:02, 1761.70it/s][A
4973it [00:02, 1614.79it/s][A
5141it [00:03, 1560.42it/s][A
5323it [00:03, 1629.32it/s][A
5493it [00:03, 1647.21it/s][A
5665it [00:03, 1665.60it/s][A
5879it

decade 1920s has 587844 established usages for meta-learning, 23835 novel usages in next decade, 81393 novel usages in all future decades


 50%|█████     | 8/16 [02:13<02:15, 16.89s/it]
0it [00:00, ?it/s][A
216it [00:00, 2156.50it/s][A
350it [00:00, 1785.04it/s][A
492it [00:00, 1654.20it/s][A
668it [00:00, 1683.08it/s][A
876it [00:00, 1784.29it/s][A
1025it [00:00, 1653.61it/s][A
1198it [00:00, 1675.60it/s][A
1355it [00:00, 1633.81it/s][A
1510it [00:00, 1535.13it/s][A
1745it [00:01, 1712.87it/s][A
1920it [00:01, 1364.58it/s][A
2085it [00:01, 1438.47it/s][A
2291it [00:01, 1580.34it/s][A
2522it [00:01, 1745.62it/s][A
2771it [00:01, 1916.04it/s][A
2978it [00:01, 1878.49it/s][A
3177it [00:01, 1878.51it/s][A
3373it [00:01, 1846.56it/s][A
3563it [00:02, 1805.68it/s][A
3754it [00:02, 1835.30it/s][A
4007it [00:02, 1999.19it/s][A
4227it [00:02, 2051.61it/s][A
4437it [00:02, 1743.65it/s][A
4623it [00:02, 1622.20it/s][A
4795it [00:02, 1628.23it/s][A
4965it [00:02, 1564.06it/s][A
5127it [00:03, 1470.10it/s][A
5328it [00:03, 1598.50it/s][A
5495it [00:03, 1580.38it/s][A
5722it [00:03, 1735.76it/s][A
5927it

decade 1930s has 605722 established usages for meta-learning, 20747 novel usages in next decade, 66603 novel usages in all future decades


 56%|█████▋    | 9/16 [02:30<01:58, 16.98s/it]
0it [00:00, ?it/s][A
188it [00:00, 1855.03it/s][A
341it [00:00, 1741.81it/s][A
471it [00:00, 1579.82it/s][A
648it [00:00, 1624.30it/s][A
866it [00:00, 1757.90it/s][A
1024it [00:00, 1671.78it/s][A
1190it [00:00, 1667.64it/s][A
1381it [00:00, 1687.61it/s][A
1542it [00:00, 1576.55it/s][A
1733it [00:01, 1661.97it/s][A
1898it [00:01, 1387.20it/s][A
2044it [00:01, 1346.36it/s][A
2222it [00:01, 1448.51it/s][A
2449it [00:01, 1620.37it/s][A
2708it [00:01, 1823.41it/s][A
2907it [00:01, 1743.22it/s][A
3094it [00:01, 1667.73it/s][A
3289it [00:01, 1680.45it/s][A
3464it [00:02, 1647.79it/s][A
3655it [00:02, 1717.31it/s][A
3876it [00:02, 1839.27it/s][A
4066it [00:02, 1838.33it/s][A
4273it [00:02, 1897.36it/s][A
4466it [00:02, 1766.89it/s][A
4647it [00:02, 1723.61it/s][A
4840it [00:02, 1770.27it/s][A
5020it [00:02, 1525.12it/s][A
5181it [00:03, 1525.07it/s][A
5370it [00:03, 1618.42it/s][A
5538it [00:03, 1591.04it/s][A
5741it

decade 1940s has 620953 established usages for meta-learning, 21200 novel usages in next decade, 50919 novel usages in all future decades


 62%|██████▎   | 10/16 [02:50<01:48, 18.02s/it]
0it [00:00, ?it/s][A
163it [00:00, 1600.18it/s][A
315it [00:00, 1568.41it/s][A
443it [00:00, 1467.53it/s][A
645it [00:00, 1598.46it/s][A
809it [00:00, 1608.86it/s][A
1028it [00:00, 1745.49it/s][A
1190it [00:00, 1676.66it/s][A
1357it [00:00, 1672.24it/s][A
1519it [00:00, 1606.30it/s][A
1755it [00:01, 1775.18it/s][A
1936it [00:01, 1456.30it/s][A
2094it [00:01, 1458.16it/s][A
2302it [00:01, 1601.36it/s][A
2522it [00:01, 1738.71it/s][A
2728it [00:01, 1809.59it/s][A
2917it [00:01, 1713.64it/s][A
3096it [00:01, 1674.01it/s][A
3269it [00:01, 1673.89it/s][A
3440it [00:02, 1520.56it/s][A
3599it [00:02, 1540.25it/s][A
3790it [00:02, 1635.09it/s][A
3968it [00:02, 1674.73it/s][A
4210it [00:02, 1840.57it/s][A
4402it [00:02, 1645.02it/s][A
4576it [00:02, 1600.45it/s][A
4743it [00:02, 1560.07it/s][A
4909it [00:02, 1588.74it/s][A
5072it [00:03, 1494.69it/s][A
5239it [00:03, 1542.11it/s][A
5416it [00:03, 1591.68it/s][A
5578i

decade 1950s has 637495 established usages for meta-learning, 20560 novel usages in next decade, 35017 novel usages in all future decades


 69%|██████▉   | 11/16 [03:09<01:30, 18.07s/it]
0it [00:00, ?it/s][A
167it [00:00, 1599.56it/s][A
320it [00:00, 1575.27it/s][A
460it [00:00, 1506.17it/s][A
637it [00:00, 1571.63it/s][A
864it [00:00, 1730.82it/s][A
1038it [00:00, 1733.13it/s][A
1216it [00:00, 1737.83it/s][A
1380it [00:00, 1683.29it/s][A
1785it [00:00, 2040.93it/s][A
2020it [00:01, 2079.47it/s][A
2449it [00:01, 2459.05it/s][A
2843it [00:01, 2771.48it/s][A
3218it [00:01, 3006.54it/s][A
3555it [00:01, 3066.56it/s][A
3994it [00:01, 3369.35it/s][A
4358it [00:01, 3327.80it/s][A
4710it [00:01, 3302.87it/s][A
5054it [00:01, 3222.26it/s][A
5421it [00:01, 3341.43it/s][A
5821it [00:02, 3514.99it/s][A
6196it [00:02, 3581.06it/s][A
6600it [00:02, 3703.17it/s][A
6976it [00:02, 3493.56it/s][A
7436it [00:02, 3763.96it/s][A
7823it [00:02, 3559.16it/s][A
8189it [00:02, 3518.92it/s][A
8569it [00:02, 3598.63it/s][A
8952it [00:02, 3663.83it/s][A
9323it [00:03, 3674.31it/s][A
9694it [00:03, 3648.53it/s][A
10061

decade 1960s has 654444 established usages for meta-learning, 19309 novel usages in next decade, 19319 novel usages in all future decades


 75%|███████▌  | 12/16 [03:19<01:02, 15.70s/it]
0it [00:00, ?it/s][A
186it [00:00, 1858.68it/s][A
348it [00:00, 1779.37it/s][A
486it [00:00, 1635.85it/s][A
640it [00:00, 1601.46it/s][A
814it [00:00, 1640.36it/s][A
990it [00:00, 1673.77it/s][A
1137it [00:00, 1041.24it/s][A
1287it [00:00, 1145.94it/s][A
1433it [00:01, 1224.17it/s][A
1593it [00:01, 1315.62it/s][A
1841it [00:01, 1530.48it/s][A
2015it [00:01, 1206.99it/s][A
2223it [00:01, 1378.76it/s][A
2512it [00:01, 1633.86it/s][A
2731it [00:01, 1767.91it/s][A
2938it [00:01, 1762.51it/s][A
3135it [00:02, 1727.84it/s][A
3323it [00:02, 1671.73it/s][A
3501it [00:02, 1605.60it/s][A
3679it [00:02, 1654.09it/s][A
3901it [00:02, 1789.40it/s][A
4141it [00:02, 1935.85it/s][A
4344it [00:02, 1700.40it/s][A
4526it [00:02, 1717.98it/s][A
4706it [00:02, 1700.33it/s][A
4882it [00:03, 1672.45it/s][A
5054it [00:03, 1510.02it/s][A
5227it [00:03, 1569.82it/s][A
5421it [00:03, 1644.50it/s][A
5604it [00:03, 1694.79it/s][A
5820it

decade 1970s has 671592 established usages for meta-learning, 11930 novel usages in next decade, 9550 novel usages in all future decades


 81%|████████▏ | 13/16 [03:37<00:49, 16.40s/it]
0it [00:00, ?it/s][A
167it [00:00, 1661.41it/s][A
338it [00:00, 1674.00it/s][A
464it [00:00, 1522.73it/s][A
639it [00:00, 1583.52it/s][A
861it [00:00, 1730.92it/s][A
1024it [00:00, 1686.58it/s][A
1217it [00:00, 1752.49it/s][A
1388it [00:00, 1732.61it/s][A
1555it [00:00, 1638.85it/s][A
1749it [00:01, 1718.81it/s][A
1920it [00:01, 1362.35it/s][A
2102it [00:01, 1453.14it/s][A
2323it [00:01, 1618.91it/s][A
2585it [00:01, 1828.50it/s][A
2799it [00:01, 1911.96it/s][A
3004it [00:01, 1864.58it/s][A
3201it [00:01, 1787.33it/s][A
3388it [00:01, 1739.19it/s][A
3568it [00:02, 1746.35it/s][A
3805it [00:02, 1893.08it/s][A
4026it [00:02, 1971.92it/s][A
4249it [00:02, 2041.94it/s][A
4458it [00:02, 1784.13it/s][A
4646it [00:02, 1780.18it/s][A
4851it [00:02, 1824.55it/s][A
5039it [00:02, 1639.41it/s][A
5232it [00:02, 1713.54it/s][A
5416it [00:03, 1735.40it/s][A
5603it [00:03, 1771.94it/s][A
5856it [00:03, 1946.70it/s][A
6058i

decade 1980s has 682424 established usages for meta-learning, 6977 novel usages in next decade, 3671 novel usages in all future decades


 88%|████████▊ | 14/16 [03:53<00:32, 16.47s/it]
0it [00:00, ?it/s][A
286it [00:00, 2857.69it/s][A
432it [00:00, 2201.57it/s][A
612it [00:00, 2063.01it/s][A
799it [00:00, 1997.96it/s][A
986it [00:00, 1954.58it/s][A
1143it [00:00, 1704.52it/s][A
1332it [00:00, 1755.72it/s][A
1493it [00:00, 1564.67it/s][A
1719it [00:00, 1723.35it/s][A
1892it [00:01, 1406.74it/s][A
2043it [00:01, 1340.88it/s][A
2238it [00:01, 1478.39it/s][A
2430it [00:01, 1587.95it/s][A
2648it [00:01, 1725.72it/s][A
2831it [00:01, 1689.55it/s][A
3034it [00:01, 1778.29it/s][A
3218it [00:01, 1733.72it/s][A
3396it [00:02, 1608.99it/s][A
3584it [00:02, 1680.16it/s][A
3777it [00:02, 1747.84it/s][A
4000it [00:02, 1868.82it/s][A
4212it [00:02, 1930.52it/s][A
4410it [00:02, 1725.17it/s][A
4590it [00:02, 1664.13it/s][A
4762it [00:02, 1679.83it/s][A
4934it [00:02, 1587.75it/s][A
5097it [00:03, 1529.34it/s][A
5284it [00:03, 1616.25it/s][A
5450it [00:03, 1627.72it/s][A
5632it [00:03, 1680.63it/s][A
5860i

decade 1990s has 688972 established usages for meta-learning, 3600 novel usages in next decade, 500 novel usages in all future decades


 94%|█████████▍| 15/16 [04:11<00:16, 16.83s/it]
0it [00:00, ?it/s][A
176it [00:00, 1756.85it/s][A
351it [00:00, 1746.95it/s][A
522it [00:00, 1735.52it/s][A
735it [00:00, 1833.55it/s][A
933it [00:00, 1874.40it/s][A
1093it [00:00, 1738.59it/s][A
1250it [00:00, 1634.31it/s][A
1422it [00:00, 1658.64it/s][A
1612it [00:00, 1723.67it/s][A
1872it [00:01, 1784.29it/s][A
2048it [00:01, 1422.47it/s][A
2230it [00:01, 1521.89it/s][A
2465it [00:01, 1700.48it/s][A
2730it [00:01, 1892.75it/s][A
2936it [00:01, 1841.37it/s][A
3132it [00:01, 1798.27it/s][A
3321it [00:01, 1659.25it/s][A
3495it [00:02, 1663.26it/s][A
3670it [00:02, 1688.16it/s][A
3868it [00:02, 1765.53it/s][A
4111it [00:02, 1921.53it/s][A
4310it [00:02, 1720.51it/s][A
4501it [00:02, 1772.43it/s][A
4687it [00:02, 1796.61it/s][A
4888it [00:02, 1855.28it/s][A
5078it [00:02, 1675.55it/s][A
5252it [00:03, 1640.73it/s][A
5422it [00:03, 1657.26it/s][A
5631it [00:03, 1765.89it/s][A
5885it [00:03, 1939.48it/s][A
6101i

decade 2000s has 692510 established usages for meta-learning, 562 novel usages in next decade, 0 novel usages in all future decades


100%|██████████| 16/16 [04:29<00:00, 16.85s/it]


In [45]:
from tqdm import tqdm
import pandas as pd
min_estab_num = 5
min_novel_num = 1
for decade in tqdm(range(1850, 2010, 10)):
    valid_row_idx = []
    learning_df = pd.read_csv('/h/19/jadeleiyu/frame_extension/data/gsn/gsn_learning_df_{}s.csv'.format(decade))
    for index, row in learning_df.iterrows():
        established_nouns = eval(row['established nouns'])
        nn_next = eval(row['novel nouns next decade'])
        nn_future = eval(row['novel nouns all future'])
        if len(established_nouns) >= min_estab_num and len(nn_next) >= min_novel_num and len(nn_future) >= min_novel_num:
            valid_row_idx.append(index)
    learning_df = learning_df.iloc[valid_row_idx, :]
    learning_df.to_csv('/h/19/jadeleiyu/frame_extension/data/gsn/gsn_learning_df_{}s.csv'.format(decade), index=False)
        
    

100%|██████████| 16/16 [02:11<00:00,  8.24s/it]


In [3]:
# get verb2idx and rel2idx
import pandas as pd
grouped_frame_df = pd.read_csv('/h/19/jadeleiyu/frame_extension/data/gsn/grouped_frame_df.csv')

In [4]:
grouped_frame_df

Unnamed: 0,verb,relation,noun,count in 1800s,total count up to 1800s,count in 1810s,total count up to 1810s,count in 1820s,total count up to 1820s,count in 1830s,...,count in 1960s,total count up to 1960s,count in 1970s,total count up to 1970s,count in 1980s,total count up to 1980s,count in 1990s,total count up to 1990s,count in 2000s,total count up to 2000s
0,abandon,dobj,"['abode', 'action', 'aircraft', 'animal', 'apa...","[1, 0, 0, 0, 0, 0, 4, 0, 4, 2, 0, 13, 0, 0, 1,...","[1, 0, 0, 0, 0, 0, 4, 0, 4, 2, 0, 13, 0, 0, 1,...","[3, 0, 0, 0, 0, 0, 1, 1, 7, 0, 0, 16, 12, 0, 1...","[4, 0, 0, 0, 0, 0, 5, 1, 11, 2, 0, 29, 12, 0, ...","[7, 6, 0, 2, 0, 0, 7, 3, 12, 0, 1, 44, 4, 0, 0...","[11, 6, 0, 2, 0, 0, 12, 4, 23, 2, 1, 73, 16, 0...","[8, 0, 0, 1, 0, 0, 4, 12, 8, 0, 0, 35, 10, 0, ...",...,"[7, 73, 85, 17, 7, 175, 81, 54, 3, 38, 28, 53,...","[176, 376, 183, 182, 31, 330, 455, 466, 116, 1...","[11, 147, 56, 23, 4, 255, 30, 50, 10, 33, 76, ...","[187, 523, 239, 205, 35, 585, 485, 516, 126, 1...","[9, 135, 93, 24, 6, 284, 53, 59, 0, 27, 95, 36...","[196, 658, 332, 229, 41, 869, 538, 575, 126, 2...","[6, 175, 80, 44, 16, 409, 47, 84, 2, 51, 265, ...","[202, 833, 412, 273, 57, 1278, 585, 659, 128, ...","[37, 229, 195, 138, 48, 482, 109, 171, 19, 76,...","[239, 1062, 607, 411, 105, 1760, 694, 830, 147..."
1,abandon,nsubj,"['body', 'camp', 'child', 'church', 'city', 'c...","[0, 5, 3, 0, 3, 0, 0, 0, 0, 22, 0, 0, 0, 3, 2,...","[0, 5, 3, 0, 3, 0, 0, 0, 0, 22, 0, 0, 0, 3, 2,...","[4, 2, 7, 3, 1, 0, 0, 0, 1, 74, 0, 0, 2, 2, 2,...","[4, 7, 10, 3, 4, 0, 0, 0, 1, 96, 0, 0, 2, 5, 4...","[3, 2, 5, 0, 0, 0, 0, 0, 10, 65, 0, 3, 5, 11, ...","[7, 9, 15, 3, 4, 0, 0, 0, 11, 161, 0, 3, 7, 16...","[10, 2, 3, 0, 0, 3, 5, 0, 22, 81, 0, 4, 2, 0, ...",...,"[4, 16, 40, 14, 25, 5, 42, 10, 5, 67, 19, 68, ...","[179, 157, 165, 157, 102, 56, 214, 91, 215, 12...","[2, 21, 29, 9, 18, 6, 15, 4, 5, 50, 9, 70, 6, ...","[181, 178, 194, 166, 120, 62, 229, 95, 220, 12...","[1, 21, 36, 4, 15, 5, 6, 8, 3, 34, 16, 81, 13,...","[182, 199, 230, 170, 135, 67, 235, 103, 223, 1...","[11, 43, 33, 11, 23, 7, 16, 1, 0, 51, 10, 206,...","[193, 242, 263, 181, 158, 74, 251, 104, 223, 1...","[57, 60, 64, 48, 56, 29, 28, 39, 50, 205, 30, ...","[250, 302, 327, 229, 214, 103, 279, 143, 273, ..."
2,abandon,pobj_prep.as,"['baby', 'child', 'failure']","[0, 0, 0]","[0, 0, 0]","[0, 0, 0]","[0, 0, 0]","[0, 0, 0]","[0, 0, 0]","[0, 0, 1]",...,"[7, 9, 16]","[12, 14, 90]","[8, 19, 12]","[20, 33, 102]","[16, 45, 9]","[36, 78, 111]","[38, 109, 19]","[74, 187, 130]","[54, 166, 28]","[128, 353, 158]"
3,abandon,pobj_prep.at,"['birth', 'foot', 'point', 'stage']","[0, 0, 0, 0]","[0, 0, 0, 0]","[0, 0, 0, 0]","[0, 0, 0, 0]","[0, 0, 0, 0]","[0, 0, 0, 0]","[0, 0, 0, 0]",...,"[12, 30, 26, 17]","[50, 115, 129, 99]","[18, 3, 21, 19]","[68, 118, 150, 118]","[45, 5, 19, 14]","[113, 123, 169, 132]","[85, 15, 16, 16]","[198, 138, 185, 148]","[206, 22, 76, 42]","[404, 160, 261, 190]"
4,abandon,pobj_prep.by,"['authority', 'brother', 'child', 'church', 'c...","[0, 4, 2, 2, 0, 2, 0, 0, 11, 2, 3, 15, 1, 0, 1...","[0, 4, 2, 2, 0, 2, 0, 0, 11, 2, 3, 15, 1, 0, 1...","[0, 0, 1, 0, 0, 0, 1, 0, 10, 2, 2, 25, 0, 0, 1...","[0, 4, 3, 2, 0, 2, 1, 0, 21, 4, 5, 40, 1, 0, 2...","[0, 1, 8, 0, 0, 0, 0, 0, 12, 2, 11, 37, 9, 0, ...","[0, 5, 11, 2, 0, 2, 1, 0, 33, 6, 16, 77, 10, 0...","[0, 6, 13, 0, 0, 3, 2, 0, 20, 1, 2, 34, 18, 0,...",...,"[9, 2, 43, 15, 23, 5, 10, 7, 6, 11, 17, 106, 6...","[72, 65, 181, 131, 71, 150, 108, 86, 186, 70, ...","[19, 12, 50, 18, 40, 7, 16, 2, 3, 3, 10, 78, 1...","[91, 77, 231, 149, 111, 157, 124, 88, 189, 73,...","[11, 5, 46, 23, 11, 5, 18, 4, 1, 9, 6, 66, 162...","[102, 82, 277, 172, 122, 162, 142, 92, 190, 82...","[13, 10, 63, 39, 45, 6, 18, 16, 6, 14, 10, 111...","[115, 92, 340, 211, 167, 168, 160, 108, 196, 9...","[30, 36, 93, 91, 54, 29, 49, 34, 40, 25, 23, 2...","[145, 128, 433, 302, 221, 197, 209, 142, 236, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28163,zone,pobj_prep.in,"['area', 'city']","[0, 0]","[0, 0]","[0, 0]","[0, 0]","[0, 0]","[0, 0]","[0, 0]",...,"[27, 49]","[53, 73]","[66, 30]","[119, 103]","[26, 16]","[145, 119]","[39, 30]","[184, 149]","[35, 26]","[219, 175]"
28164,zoom,dobj,"['lens', 'window']","[0, 0]","[0, 0]","[0, 0]","[0, 0]","[0, 0]","[0, 0]","[0, 0]",...,"[10, 0]","[11, 0]","[23, 0]","[34, 0]","[19, 12]","[53, 12]","[73, 46]","[126, 58]","[94, 107]","[220, 165]"
28165,zoom,nsubj,['camera'],[0],[0],[0],[0],[0],[0],[0],...,[0],[0],[10],[10],[28],[38],[70],[108],[137],[245]
28166,zoom,pobj_prep.into,['area'],[0],[0],[0],[0],[0],[0],[0],...,[0],[0],[0],[0],[1],[1],[16],[17],[106],[123]


In [4]:
verbs = set(grouped_frame_df['verb'])
len(verbs)

5256

In [5]:
rels = set(grouped_frame_df['relation'])
len(rels)

18

In [10]:
support_noun2idx = pickle.load(open('/h/19/jadeleiyu/frame_extension/data/gsn/support_noun2idx.p', 'rb'))
len(support_noun2idx)



4133

0

In [9]:
import pickle
verb2idx = {v:i for (i, v) in enumerate(verbs)}
pickle.dump(verb2idx, open('/h/19/jadeleiyu/frame_extension/data/gsn/verb2idx.p', 'wb'))

rel2idx = {r:i for (i, r) in enumerate(rels)}
pickle.dump(rel2idx, open('/h/19/jadeleiyu/frame_extension/data/gsn/rel2idx.p', 'wb'))


In [1]:
# get HistWords embeddings for support nouns
from histwords import get_histwords_embeddings
import pickle

noun2idx = pickle.load(open('/h/19/jadeleiyu/frame_extension/data/gsn/support_noun2idx.p', 'rb'))
get_histwords_embeddings(noun2idx)

In [1]:
from gsn import learning_df_prep
import pandas as pd
from tqdm import tqdm
list_converter = {'novel nouns next decade': eval, 'novel nouns all future': eval, 'established nouns': eval,
                      'established noun counts': eval}
grouped_frame_df = pd.read_csv('/h/19/jadeleiyu/frame_extension/data/gsn/grouped_frame_df.csv',
                              converters=list_converter)

In [2]:
learning_df_prep(grouped_frame_df)

28168it [00:08, 3334.54it/s]
28168it [00:08, 3354.83it/s]


In [5]:
from gsn import get_test_dfs
get_test_dfs()

In [4]:
import pandas as pd
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_rows', 1000)
training_df = pd.read_csv('/h/19/jadeleiyu/frame_extension/data/gsn/training_df_{}s.csv'.format(1980))
training_df

  


Unnamed: 0,frame,frame id,established nouns,established noun counts,novel nouns,novel nouns train,novel nouns evaluation
0,accept-dobj,0,"['office', 'post', 'appointment', 'authority', 'help', 'rule', 'call', 'bill']","[40163, 22796, 22434, 15022, 13521, 12898, 12485, 11143]","['organ', 'beer', 'target', 'bolt', 'pin', 'university', 'tool', 'mug', 'market', 'pet']","['organ', 'beer', 'target', 'bolt', 'pin', 'university', 'tool', 'mug']","['market', 'pet']"
1,access-dobj,1,"['file', 'record', 'system', 'memory', 'base', 'computer', 'network', 'table']","[2821, 1627, 1294, 1138, 1047, 494, 479, 365]","['drive', 'site', 'tool', 'box', 'server', 'range', 'document', 'internet', 'web', 'folder']","['drive', 'site', 'tool', 'box', 'server', 'range', 'document', 'internet']","['web', 'folder']"
2,accommodate-dobj,2,"['person', 'people', 'student', 'family', 'child', 'patient', 'vessel', 'guest']","[12015, 11146, 7762, 5876, 5687, 4720, 3847, 3738]","['control', 'implant', 'computer', 'shock', 'hardware', 'network', 'string', 'array', 'site', 'scale']","['control', 'implant', 'computer', 'shock', 'hardware', 'network', 'string', 'array']","['site', 'scale']"
3,achieve-dobj,3,"['goal', 'objective', 'level', 'balance', 'control', 'target', 'power', 'measure']","[249388, 154740, 42036, 31803, 19959, 12730, 12388, 8808]","['binding', 'table', 'rendering', 'water', 'device', 'uniform', 'pinnacle', 'filling', 'lighting', 'print']","['binding', 'table', 'rendering', 'water', 'device', 'uniform', 'pinnacle', 'filling']","['lighting', 'print']"
4,acknowledge-dobj,4,"['authority', 'help', 'support', 'power', 'justice', 'king', 'source', 'failure']","[18282, 12336, 6585, 6324, 4062, 3259, 3191, 1565]","['threat', 'crowd', 'area', 'family', 'level', 'teacher', 'student', 'packet', 'root', 'institution']","['threat', 'crowd', 'area', 'family', 'level', 'teacher', 'student', 'packet']","['root', 'institution']"
5,acquire-dobj,5,"['habit', 'power', 'control', 'art', 'stock', 'form', 'material', 'weapon']","[34652, 33065, 13292, 9809, 5702, 5364, 4813, 4797]","['carrier', 'channel', 'sequence', 'center', 'brand', 'nutrient', 'submarine', 'player', 'pet', 'network']","['carrier', 'channel', 'sequence', 'center', 'brand', 'nutrient', 'submarine', 'player']","['pet', 'network']"
6,act-pobj_prep.as,6,"['guide', 'interpreter', 'secretary', 'mediator', 'judge', 'representative', 'barrier', 'officer']","[26995, 18779, 15909, 13508, 12324, 8790, 8610, 8097]","['chair', 'player', 'network', 'net', 'probe', 'node', 'router', 'wrapper', 'umbrella', 'portal']","['chair', 'player', 'network', 'net', 'probe', 'node', 'router', 'wrapper']","['umbrella', 'portal']"
7,act-pobj_prep.like,7,"['man', 'child', 'fool', 'people', 'woman', 'boy', 'person', 'baby']","[7092, 3113, 2359, 1543, 1466, 1253, 1143, 962]","['filter', 'star', 'officer', 'guy', 'cat', 'lawyer', 'family', 'scientist', 'bitch', 'tourist']","['filter', 'star', 'officer', 'guy', 'cat', 'lawyer', 'family', 'scientist']","['bitch', 'tourist']"
8,activate-dobj,8,"['system', 'cell', 'mechanism', 'switch', 'device', 'fiber', 'lymphocyte', 'alarm']","[5774, 2774, 2673, 707, 660, 442, 425, 421]","['link', 'setting', 'neutrophil', 'action', 'bar', 'layer', 'window', 'box', 'tool', 'menu']","['link', 'setting', 'neutrophil', 'action', 'bar', 'layer', 'window', 'box']","['tool', 'menu']"
9,add-dobj,9,"['water', 'drop', 'salt', 'sugar', 'weight', 'line', 'milk', 'material']","[52931, 47826, 15806, 11928, 10986, 9865, 7919, 6995]","['handler', 'ram', 'contact', 'folder', 'menu', 'server', 'interface', 'printer', 'port', 'slide']","['handler', 'ram', 'contact', 'folder', 'menu', 'server', 'interface', 'printer']","['port', 'slide']"


In [8]:
import pandas as pd
frame_df = pd.read_csv('/h/19/jadeleiyu/frame_extension/data/gsn/frame_eval_df_{}s.csv'.format(1960))
frame_df

Unnamed: 0.1,Unnamed: 0,frame,support nouns,support noun counts,ground truth novel nouns
0,0,accommodate-dobj,"['person', 'people', 'student', 'family', 'child', 'patient', 'pupil', 'guest', 'vessel', 'passenger', 'crowd', 'ship', 'class', 'boy', 'friend', 'body']","[9397, 6388, 4608, 3748, 3464, 3343, 3082, 2626, 2621, 2136, 2098, 1446, 1014, 972, 901, 803]","['network', 'housing', 'site', 'hardware', 'implant', 'study', 'shock', 'array', 'computer', 'storage']"
1,1,achieve-dobj,"['goal', 'objective', 'balance', 'level', 'control', 'power', 'measure', 'life', 'target', 'security', 'form', 'equilibrium', 'system', 'action', 'justice', 'place']","[60111, 44505, 9756, 8315, 5817, 5123, 4429, 3208, 2916, 2801, 2614, 1975, 1628, 1493, 1480, 1265]","['high', 'block', 'device', 'decline', 'woman', 'water', 'binding', 'interface', 'transport', 'cover']"
2,2,add-dobj,"['drop', 'water', 'salt', 'sugar', 'weight', 'milk', 'line', 'column', 'ounce', 'material', 'alcohol', 'flour', 'juice', 'egg', 'piece', 'cup']","[42262, 40401, 11000, 9131, 8488, 6061, 5701, 4517, 4466, 4185, 4185, 3701, 3684, 3607, 3527, 3445]","['user', 'menu', 'handler', 'server', 'ram', 'folder', 'header', 'printer', 'interface', 'processor']"
3,3,add-pobj_prep.to,"['beauty', 'water', 'stock', 'collection', 'comfort', 'mixture', 'power', 'library', 'store', 'weight', 'life', 'fire', 'church', 'system', 'diet', 'soil']","[10385, 9300, 8785, 8244, 7067, 6776, 6500, 5443, 5366, 4952, 4455, 3670, 3558, 3129, 3123, 3122]","['window', 'folder', 'skillet', 'aura', 'favorite', 'pane', 'tag', 'computer', 'toolbox', 'cart']"
4,4,address-dobj,"['people', 'house', 'assembly', 'envelope', 'crowd', 'man', 'court', 'person', 'king', 'chair', 'lady', 'officer', 'friend', 'woman', 'child', 'student']","[13017, 9830, 8369, 6384, 6030, 4808, 4374, 4254, 3966, 2798, 2772, 2705, 2577, 2568, 2501, 2256]","['structure', 'constraint', 'barrier', 'case', 'objective', 'failure', 'goal', 'source', 'mechanism', 'gap']"
5,5,aim-pobj_prep.at,"['power', 'head', 'heart', 'life', 'establishment', 'goal', 'target', 'control', 'objective', 'crown', 'point', 'style', 'person', 'man', 'throne', 'glory']","[2975, 2347, 2086, 1961, 1561, 1458, 1400, 1185, 770, 723, 717, 613, 594, 575, 518, 495]","['black', 'kid', 'patient', 'manager', 'male', 'crowd', 'viewer', 'employee', 'resident', 'tourist']"
6,6,allow-dobj,"['room', 'escape', 'movement', 'water', 'man', 'scope', 'person', 'formation', 'foot', 'place', 'weight', 'return', 'action', 'control', 'pound', 'power']","[6660, 5629, 5269, 4144, 3104, 2766, 2195, 2075, 2064, 2057, 1864, 1825, 1790, 1787, 1741, 1715]","['server', 'pet', 'link', 'screening', 'device', 'computer', 'file', 'developer', 'designer', 'binding']"
7,7,allow-pobj_prep.for,"['movement', 'meal', 'action', 'wear', 'rise', 'weight', 'escape', 'range', 'resistance', 'study', 'formation', 'support', 'return', 'case', 'extension', 'drop']","[1740, 823, 712, 692, 619, 586, 468, 449, 443, 418, 393, 387, 363, 356, 343, 342]","['rule', 'file', 'image', 'model', 'mechanism', 'assembly', 'screening', 'user', 'constraint', 'binding']"
8,8,analyze-dobj,"['structure', 'case', 'system', 'material', 'record', 'form', 'circuit', 'mixture', 'action', 'source', 'movement', 'mechanism', 'gas', 'subject', 'light', 'market']","[3638, 3119, 3043, 1650, 1215, 1162, 1133, 1122, 1086, 1061, 836, 787, 733, 730, 669, 552]","['packet', 'artifact', 'videotape', 'gap', 'indicator', 'array', 'threat', 'transport', 'constraint', 'linkage']"
9,9,appear-nsubj,"['man', 'sign', 'book', 'sun', 'christ', 'star', 'light', 'form', 'enemy', 'head', 'line', 'face', 'woman', 'person', 'image', 'life']","[14708, 9823, 9666, 6609, 6604, 6192, 5876, 5856, 5368, 4916, 4682, 4034, 3985, 3783, 3486, 3376]","['slide', 'button', 'marker', 'menu', 'panel', 'file', 'tab', 'display', 'arrow', 'folder']"


In [7]:
import pandas as pd
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_rows', 1200)
noun_df = pd.read_csv('/h/19/jadeleiyu/frame_extension/data/gsn/noun_eval_df_{}s.csv'.format(1990))
noun_df

  


Unnamed: 0.1,Unnamed: 0,query noun,ground truth extended frames
0,0,adenovirus,['generate-dobj']
1,1,ant,['run-dobj']
2,2,apartment,['exit-dobj']
3,3,area,['configure-dobj']
4,4,asp,"['create-dobj', 'provide-pobj_prep.by']"
5,5,auditor,['use-nsubj']
6,6,bank,['target-dobj']
7,7,bean,"['access-dobj', 'manage-dobj']"
8,8,center,['click-dobj']
9,9,circle,['pull-dobj']


In [10]:
noun_df.sort_values(by=['query noun id']).reset_index()

Unnamed: 0.1,index,Unnamed: 0,query noun,query noun id,ground truth extended frames
0,211,211,layer,0,['derive-pobj_prep.from']
1,131,131,editor,1,['appoint-dobj']
2,395,395,theater,2,"['attend-dobj', 'go-pobj_prep.to', 'leave-dobj']"
3,65,65,car,3,"['afford-dobj', 'approach-dobj', 'bring-dobj', 'buy-dobj', 'carry-pobj_prep.to', 'come-nsubj', 'get-dobj', 'go-nsubj', 'hear-dobj', 'move-dobj', 'pass-dobj', 'put-dobj', 'put-pobj_prep.in', 'repair-dobj', 'return-pobj_prep.to', 'saw-dobj', 'stand-nsubj', 'take-pobj_prep.in', 'wash-dobj']"
4,185,185,home,4,"['bring-pobj_prep.into', 'burn-dobj', 'buy-dobj', 'come-pobj_prep.into', 'confine-pobj_prep.to', 'destroy-dobj', 'do-pobj_prep.in', 'go-pobj_prep.into', 'grow-pobj_prep.in', 'have-pobj_prep.in', 'hold-pobj_prep.in', 'meet-pobj_prep.in', 'occupy-dobj', 'place-pobj_prep.in', 'stand-nsubj']"
5,415,415,vehicle,5,['drive-dobj']
6,352,352,soil,6,['rest-pobj_prep.on']
7,64,64,candy,7,['eat-dobj']
8,410,410,truck,8,['drive-dobj']
9,212,212,leak,9,['prevent-dobj']


In [3]:
import numpy as np
from sklearn.metrics import roc_auc_score

y_true = np.array([[0, 0, 0, 1],[1,1,0,1]])
y_pred = np.array([[0.2,0.3,0.2,0.3,], [0.2,0.3,0.2,0.3]])

roc_auc_score(y_true, y_pred)

ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.

In [10]:
import pickle
cnp_embeddings_0 = pickle.load(
        open('/h/19/jadeleiyu/frame_extension/data/cnp/cnp_hist_embeddings_{}'.format(1850), 'rb'))

In [43]:
from numpy import linalg as LA
sum(LA.norm(cnp_embeddings_0, axis=1) <= 1)

430

In [16]:
cnp_embeddings_1 = pickle.load(
        open('/h/19/jadeleiyu/frame_extension/data/cnp/cnp_hist_embeddings_{}'.format(2000), 'rb'))

In [49]:
sum(LA.norm(cnp_embeddings_1, axis=1) > 50)

107

In [35]:
cnp_embeddings_0

array([[ 1.53146126, -0.74703433, -0.0237342 , ..., -0.51910192,
         0.7692309 ,  0.22645915],
       [ 0.79255469,  0.07837161, -0.20471464, ..., -0.12785049,
         0.04658372, -0.46108703],
       [ 1.49134142,  0.45596649, -0.71481728, ...,  0.33234437,
         0.11171546,  0.55903612],
       ...,
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 3.00398244,  0.38930292, -0.74868414, ..., -0.00562923,
         0.36869568, -0.31826189],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ]])

In [37]:
import numpy as np
ling_embeddings_0 = np.load('/h/19/jadeleiyu/frame_extension/data/histwords_embeddings/embeddings_{}.npy'.format(1850))

In [56]:
sum(LA.norm(ling_embeddings_0, axis=1) > 10.4)

7

4133

In [1]:
import pandas as pd
list_converter = {'novel nouns': eval, 'most similar novel nouns': eval, 'least similar novel nouns': eval,
                      'established nouns': eval, 'ground truth extended frames': eval,
                      'support nouns': eval, 'ground truth novel nouns': eval, 'support noun counts': eval,
                      'established noun counts': eval}
decade = 1860
decade_noun_test_df = pd.read_csv(
            '/h/19/jadeleiyu/frame_extension/data/gsn/noun_eval_df_{}s.csv'.format(decade),
            converters=list_converter)

In [4]:
list(decade_noun_test_df['query noun'])

['accommodation',
 'acquaintance',
 'action',
 'actor',
 'addict',
 'adult',
 'advocate',
 'aegis',
 'aggregation',
 'aide',
 'aircraft',
 'airfield',
 'airplane',
 'airport',
 'alarm',
 'alcohol',
 'alcoholic',
 'alien',
 'ambulance',
 'ammunition',
 'ancestor',
 'animal',
 'antenna',
 'apartment',
 'ape',
 'appointment',
 'arc',
 'architecture',
 'area',
 'arena',
 'arm',
 'armchair',
 'array',
 'arrow',
 'art',
 'artifact',
 'assembly',
 'assistant',
 'athletics',
 'atrium',
 'attic',
 'attorney',
 'aunt',
 'aura',
 'author',
 'authority',
 'automobile',
 'baby',
 'back',
 'background',
 'bag',
 'bait',
 'balance',
 'ball',
 'bank',
 'bar',
 'barrel',
 'barrier',
 'base',
 'baseball',
 'basement',
 'basin',
 'basket',
 'basketball',
 'bastard',
 'bath',
 'bathroom',
 'batter',
 'battery',
 'bay',
 'beach',
 'beaker',
 'beam',
 'bear',
 'bearing',
 'beat',
 'beauty',
 'beaver',
 'bed',
 'bedroom',
 'beer',
 'bell',
 'belly',
 'belt',
 'bench',
 'berlin',
 'bicycle',
 'bill',
 'bin',


In [6]:
1e6

1000000.0

In [11]:
import numpy as np
import pickle
import pandas as pd

noun2idx = pickle.load(open('/h/19/jadeleiyu/frame_extension/data/gsn/support_noun2idx.p',
                            'rb'))  # a common noun2idx lookup dict shared across all modalities
noun_decade_counts = pickle.load(open('/h/19/jadeleiyu/frame_extension/data/gsn/noun_decade_counts.p', 'rb'))
list_converter = {'novel nouns': eval, 'most similar novel nouns': eval, 'least similar novel nouns': eval,
                  'established nouns': eval, 'ground truth extended frames': eval,
                  'support nouns': eval, 'ground truth novel nouns': eval, 'support noun counts': eval,
                  'established noun counts': eval}


decade = 1900
decade_idx = int((decade - 1800) / 10)
decade_noun_test_df = pd.read_csv(
        '/h/19/jadeleiyu/frame_extension/data/gsn/noun_eval_df_{}s.csv'.format(decade),
        converters=list_converter)
noun_counts = noun_decade_counts[decade_idx]
noun2decade_count = {n: noun_counts[idx] for (n, idx) in noun2idx.items()}
prediction_dir = '/h/19/jadeleiyu/frame_extension/predictions/'


In [44]:
precisions_f_ling = pickle.load(open(prediction_dir + 'mean_precisions_exemplar_loss_ling', 'rb'))[int((decade - 1850) / 10)]
precisions_f_vis_ont = pickle.load(open(prediction_dir + 'mean_precisions_exemplar_loss_vis-ont', 'rb'))[int((decade - 1850) / 10)]
precisions_f_vis = pickle.load(open(prediction_dir + 'mean_precisions_exemplar_loss_vis', 'rb'))[int((decade - 1850) / 10)]
precisions_f_vis = pickle.load(open(prediction_dir + 'mean_precisions_exemplar_loss_vis', 'rb'))[int((decade - 1850) / 10)]


query_nouns = list(decade_noun_test_df['query noun'])
query_noun_decade_counts = [noun2decade_count[noun] for noun in query_nouns]
decade_noun_test_df['query noun acc count'] = pd.Series(query_noun_decade_counts)
decade_noun_test_df['query noun precision ling'] = pd.Series(precisions_f_ling)
decade_noun_test_df['query noun precision vis-ont'] = pd.Series(precisions_f_vis_ont)
decade_noun_test_df['query noun precision vis'] = pd.Series(precisions_f_vis)


In [51]:
count_thresholds = np.array([0, 50, 100, 500, 1000, 5000, 10000, 50000, 100000, 500000, 1000000])
decade_mean_precisions_ling = \
    decade_noun_test_df.groupby(pd.cut(decade_noun_test_df['query noun acc count'], count_thresholds)).mean()[
        'query noun precision ling']

decade_mean_precisions_vis_ont = \
    decade_noun_test_df.groupby(pd.cut(decade_noun_test_df['query noun acc count'], count_thresholds)).mean()[
        'query noun precision vis-ont']

decade_mean_precisions_vis = \
    decade_noun_test_df.groupby(pd.cut(decade_noun_test_df['query noun acc count'], count_thresholds)).mean()[
        'query noun precision vis']


print(decade_mean_precisions_ling)
print(decade_mean_precisions_vis_ont)
print(decade_mean_precisions_vis)

query noun acc count
(0, 50]              0.648817
(50, 100]            0.595228
(100, 500]           0.710807
(500, 1000]          0.749181
(1000, 5000]         0.732600
(5000, 10000]        0.729839
(10000, 50000]       0.685695
(50000, 100000]      0.632089
(100000, 500000]     0.539942
(500000, 1000000]    0.374457
Name: query noun precision ling, dtype: float64
query noun acc count
(0, 50]              0.860493
(50, 100]            0.861624
(100, 500]           0.839598
(500, 1000]          0.848368
(1000, 5000]         0.806566
(5000, 10000]        0.795481
(10000, 50000]       0.751004
(50000, 100000]      0.694224
(100000, 500000]     0.575520
(500000, 1000000]    0.388466
Name: query noun precision vis-ont, dtype: float64
query noun acc count
(0, 50]              0.764771
(50, 100]            0.767667
(100, 500]           0.731618
(500, 1000]          0.733693
(1000, 5000]         0.714846
(5000, 10000]        0.684236
(10000, 50000]       0.677304
(50000, 100000]      0.63017

In [2]:
import pandas as pd
list_converter = {'novel nouns': eval, 'most similar novel nouns': eval, 'least similar novel nouns': eval,
                  'established nouns': eval, 'ground truth extended frames': eval,
                  'support nouns': eval, 'ground truth novel nouns': eval, 'support noun counts': eval,
                  'established noun counts': eval}
eval_dfs = []
for decade in range(1850, 2000, 10):
    df = pd.read_csv(
        '/h/19/jadeleiyu/frame_extension/data/gsn/frame_eval_df_{}s.csv'.format(decade),
        converters=list_converter)
    L = len(df['frame'])
    df['decade'] = pd.Series([decade]*L)
    eval_dfs.append(df)

result_df = pd.concat(eval_dfs)

In [9]:
result_df = result_df.drop(columns=['Unnamed: 0'])
result_df

Unnamed: 0,frame,support nouns,support noun counts,ground truth novel nouns,decade
0,abandon-dobj,"[post, place, city, field, system, friend, cam...","[1457, 1449, 1194, 795, 694, 560, 533, 485, 44...","[car, equipment, goal, site, area, weapon, act...",1850
1,accompany-pobj_prep.by,"[friend, officer, wife, son, brother, person, ...","[3310, 1927, 1883, 1871, 1461, 826, 802, 713, ...","[adult, envelope, level, failure, decline, wav...",1850
2,acquaint-pobj_prep.with,"[subject, art, person, man, people, family, wo...","[2767, 1728, 1308, 1171, 1045, 1004, 1004, 810...","[personality, worker, background, tool, patien...",1850
3,act-pobj_prep.as,"[interpreter, guide, mediator, judge, secretar...","[1838, 1832, 1166, 1130, 938, 625, 585, 547, 4...","[bridge, middleman, host, filter, link, buffer...",1850
4,add-pobj_prep.to,"[beauty, stock, church, power, comfort, weight...","[2203, 2108, 1499, 1127, 1123, 807, 753, 655, ...","[diet, pan, medium, sauce, file, pot, model, t...",1850
...,...,...,...,...,...
52,update-dobj,"[file, record, table, system, model, base, dis...","[6939, 3639, 1960, 1660, 1423, 1355, 795, 751]","[control, setting, form, package, source, inte...",1990
53,use-dobj,"[system, model, material, form, power, tool, f...","[293365, 233637, 175082, 138665, 135267, 11656...","[equal, python, website, hash, ant, owl, mitt,...",1990
54,use-pobj_prep.by,"[people, author, child, person, teacher, man, ...","[37706, 25777, 17002, 14769, 13727, 13219, 130...","[filter, connection, studio, switch, tag, web,...",1990
55,want-dobj,"[child, help, man, drink, power, food, life, p...","[29723, 27833, 11995, 11432, 9053, 8496, 8425,...","[setting, rainbow, fry, clip, player, server, ...",1990


In [10]:
cols = result_df.columns.tolist()
cols

['frame',
 'support nouns',
 'support noun counts',
 'ground truth novel nouns',
 'decade']

In [12]:
cols = [cols[-1]] + cols[:-1]
cols

['decade',
 'frame',
 'support nouns',
 'support noun counts',
 'ground truth novel nouns']

In [13]:
result_df = result_df[cols]
result_df

Unnamed: 0,decade,frame,support nouns,support noun counts,ground truth novel nouns
0,1850,abandon-dobj,"[post, place, city, field, system, friend, cam...","[1457, 1449, 1194, 795, 694, 560, 533, 485, 44...","[car, equipment, goal, site, area, weapon, act..."
1,1850,accompany-pobj_prep.by,"[friend, officer, wife, son, brother, person, ...","[3310, 1927, 1883, 1871, 1461, 826, 802, 713, ...","[adult, envelope, level, failure, decline, wav..."
2,1850,acquaint-pobj_prep.with,"[subject, art, person, man, people, family, wo...","[2767, 1728, 1308, 1171, 1045, 1004, 1004, 810...","[personality, worker, background, tool, patien..."
3,1850,act-pobj_prep.as,"[interpreter, guide, mediator, judge, secretar...","[1838, 1832, 1166, 1130, 938, 625, 585, 547, 4...","[bridge, middleman, host, filter, link, buffer..."
4,1850,add-pobj_prep.to,"[beauty, stock, church, power, comfort, weight...","[2203, 2108, 1499, 1127, 1123, 807, 753, 655, ...","[diet, pan, medium, sauce, file, pot, model, t..."
...,...,...,...,...,...
52,1990,update-dobj,"[file, record, table, system, model, base, dis...","[6939, 3639, 1960, 1660, 1423, 1355, 795, 751]","[control, setting, form, package, source, inte..."
53,1990,use-dobj,"[system, model, material, form, power, tool, f...","[293365, 233637, 175082, 138665, 135267, 11656...","[equal, python, website, hash, ant, owl, mitt,..."
54,1990,use-pobj_prep.by,"[people, author, child, person, teacher, man, ...","[37706, 25777, 17002, 14769, 13727, 13219, 130...","[filter, connection, studio, switch, tag, web,..."
55,1990,want-dobj,"[child, help, man, drink, power, food, life, p...","[29723, 27833, 11995, 11432, 9053, 8496, 8425,...","[setting, rainbow, fry, clip, player, server, ..."


In [15]:
result_df.to_csv('/h/19/jadeleiyu/frame_extension/data/gsn/frame_eval_df_all.csv', index=False)