# CS 221 Project - Text Features
Extracting the pertinent text features from the dataset to be used for classifying text.

## Libraries
Using NLTK for NLP-related parts.

In [24]:
import math
import matplotlib.pyplot as plt
import nltk
from nltk.tokenize import word_tokenize
nltk.download('punkt')
import numpy as np
import pandas as pd

[nltk_data] Downloading package punkt to /Users/cpondoc/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Loading in Data
Using the data from `final_data.csv`, which is generated from `histogram.ipynb`.

In [25]:
main_df = pd.read_csv('data/final_data.csv')

## Features 1 and 2: Lexical Diversity and Length
Calculating the lexical diversity of each document, which we define as: $\frac{\text{number of unique words}}{\text{number of total words}}$. In addition, we define the number of words (we will eventually update to modeling against a distribution).

In [26]:
# Iterate each row in the dataframe
for index, row in main_df.iterrows():
    # Calculate lexical diversity
    words = word_tokenize(main_df.at[index, 'transcript'])
    lexical_div = float((len(set(words))) / len(words))
    
    # Set appropriate columns equal
    main_df.at[index, 'length'] = len(words)
    main_df.at[index, 'diversity'] = lexical_div

## Feature 3: Frequency Distribution of Common Words
Look at the frequency of each word (we will collate all of the texts into one document and eventually compare it to the average, later).

In [None]:
# Frequency distributions
freq_dists = []
key_words = []

# Setting the column for frequency distribution
for index, row in main_df.iterrows():
    # Calculate frequency distribution and set it equal
    words = word_tokenize(main_df.at[index, 'transcript'])
    freq_dist = nltk.FreqDist(words)
    freq_dists.append(freq_dist)
    keys = freq_dist.keys()
    print(keys)

dict_keys(['Good', 'morning', '.', 'How', 'are', 'you', '?', '(', 'Laughter', ')', 'It', "'s", 'been', 'great', ',', 'has', "n't", 'it', 'I', "'ve", 'blown', 'away', 'by', 'the', 'whole', 'thing', 'In', 'fact', "'m", 'leaving', 'There', 'have', 'three', 'themes', 'running', 'through', 'conference', 'which', 'relevant', 'to', 'what', 'want', 'talk', 'about', 'One', 'is', 'extraordinary', 'evidence', 'of', 'human', 'creativity', 'in', 'all', 'presentations', 'that', 'we', 'had', 'and', 'people', 'here', 'Just', 'variety', 'range', 'The', 'second', 'put', 'us', 'a', 'place', 'where', 'no', 'idea', 'going', 'happen', 'terms', 'future', 'No', 'how', 'this', 'may', 'play', 'out.I', 'an', 'interest', 'education', 'Actually', 'find', 'everybody', 'Do', 'very', 'interesting', 'If', "'re", 'at', 'dinner', 'party', 'say', 'work', '—', 'not', 'often', 'parties', 'frankly', 'asked', 'And', 'never', 'back', 'curiously', 'That', 'strange', 'me', 'But', 'if', 'somebody', 'know', 'they', '``', 'What', 

dict_keys(['I', 'ca', "n't", 'help', 'but', 'this', 'wish', ':', 'to', 'think', 'about', 'when', 'you', "'re", 'a', 'little', 'kid', ',', 'and', 'all', 'your', 'friends', 'ask', '``', 'If', 'genie', 'could', 'give', 'one', 'in', 'the', 'world', 'what', 'would', 'it', 'be', '?', "''", 'And', 'always', 'answered', 'Well', "'d", 'want', 'have', 'wisdom', 'know', 'exactly', 'for', '.', 'then', 'screwed', 'because', 'use', 'up', 'now', 'since', 'we', 'only', '—', 'unlike', 'last', 'year', 'they', 'had', 'three', 'wishes', "'m", 'not', 'going', 'that.So', 'let', "'s", 'get', 'like', 'which', 'is', 'peace', 'thinking', 'You', 'The', 'poor', 'girl', 'there', 'she', 'thinks', 'at', 'beauty', 'pageant', 'She', 'TED', 'Prize', '(', 'Laughter', ')', 'But', 'really', 'do', 'makes', 'sense', 'that', 'first', 'step', 'people', 'meet', 'each', 'other', "'ve", 'met', 'lot', 'of', 'different', 'over', 'years', 'filmed', 'some', 'them', 'from', 'dotcom', 'executive', 'New', 'York', 'who', 'wanted', 'take

dict_keys(['I', 'think', 'was', 'supposed', 'to', 'talk', 'about', 'my', 'new', 'book', ',', 'which', 'is', 'called', '``', 'Blink', "''", 'and', 'it', "'s", 'snap', 'judgments', 'first', 'impressions', '.', 'And', 'comes', 'out', 'in', 'January', 'hope', 'you', 'all', 'buy', 'triplicate', '(', 'Laughter', ')', 'But', 'thinking', 'this', 'realized', 'that', 'although', 'makes', 'me', 'happy', 'would', 'make', 'mother', 'not', 'really', 'happiness', 'So', 'decided', 'instead', 'someone', 'who', 'has', 'done', 'as', 'much', 'Americans', 'perhaps', 'anyone', 'over', 'the', 'last', '20', 'years', 'a', 'man', 'great', 'personal', 'hero', 'of', 'mine', ':', 'by', 'name', 'Howard', 'Moskowitz', 'most', 'famous', 'for', 'reinventing', 'spaghetti', 'sauce.Howard', 'high', 'he', 'round', 'his', '60s', 'big', 'huge', 'glasses', 'thinning', 'gray', 'hair', 'kind', 'wonderful', 'exuberance', 'vitality', 'parrot', 'loves', 'opera', 'aficionado', 'medieval', 'history', 'profession', 'psychophysicist'

dict_keys(['Well', ',', 'as', 'Alexander', 'Graham', 'Bell', 'famously', 'said', 'on', 'his', 'first', 'successful', 'telephone', 'call', '``', 'Hello', 'is', 'that', 'Domino', "'s", 'Pizza', '?', "''", '(', 'Laughter', ')', 'I', 'just', 'really', 'want', 'to', 'thank', 'you', 'very', 'much', '.', 'As', 'another', 'famous', 'man', 'Jerry', 'Garcia', 'What', 'a', 'strange', 'long', 'trip', 'And', 'he', 'should', 'have', 'it', 'about', 'become', 'At', 'this', 'moment', 'are', 'viewing', 'my', 'upper', 'half', 'My', 'lower', 'appearing', 'at', 'different', 'conference', 'in', 'country', 'You', 'can', 'turns', 'out', 'be', 'two', 'places', 'once', 'But', 'still', "'m", 'sorry', 'ca', "n't", 'with', 'person', "'ll", 'explain', 'time.And', 'though', 'rock', 'star', 'assure', 'none', 'of', 'wishes', 'will', 'include', 'hot', 'tub', 'what', 'me', 'technology', 'not', 'the', 'ability', 'get', 'more', 'songs', 'MP3', 'players', 'The', 'revolution', '—', 'bigger', 'than', 'hope', 'believe', 'digi

dict_keys(['What', 'I', "'m", 'going', 'to', 'do', ',', 'in', 'the', 'spirit', 'of', 'collaborative', 'creativity', 'is', 'simply', 'repeat', 'many', 'points', 'that', 'three', 'people', 'before', 'me', 'have', 'already', 'made', 'but', 'them', '—', 'this', 'called', '``', 'creative', 'collaboration', ';', "''", 'it', "'s", 'actually', 'borrowing', 'through', 'a', 'particular', 'perspective', 'and', 'ask', 'about', 'role', 'users', 'consumers', 'emerging', 'world', 'Jimmy', 'others', 'talked', 'about.Let', 'just', 'you', 'start', 'with', 'simple', 'question', ':', 'who', 'invented', 'mountain', 'bike', '?', 'Because', 'traditional', 'economic', 'theory', 'would', 'say', 'well', 'was', 'probably', 'by', 'some', 'big', 'corporation', 'had', 'R', '&', 'D', 'lab', 'where', 'they', 'were', 'thinking', 'up', 'new', 'projects', 'came', 'out', 'there', '.', 'It', 'did', "n't", 'come', 'from', 'Another', 'answer', 'might', 'be', 'sort', 'lone', 'genius', 'working', 'his', 'garage', 'away', 'on'

dict_keys(['When', 'I', "'m", 'starting', 'talks', 'like', 'this', ',', 'usually', 'do', 'a', 'whole', 'spiel', 'about', 'sustainability', 'because', 'lot', 'of', 'people', 'out', 'there', "n't", 'know', 'what', 'that', 'is', '.', 'This', 'crowd', 'does', 'it', 'so', "'ll", 'just', 'the', '60-second', 'crib-note', 'version', 'Right', '?', 'So', 'bear', 'with', 'me', 'We', 'go', 'real', 'fast', 'you', 'Fill', 'in', 'blanks', 'small', 'planet', 'Picture', 'little', 'Earth', 'circling', 'around', 'sun', 'You', 'million', 'years', 'ago', 'bunch', 'monkeys', 'fell', 'trees', 'got', 'clever', 'harnessed', 'fire', 'invented', 'printing', 'press', 'made', 'luggage', 'wheels', 'on', 'And', 'built', 'society', 'we', 'now', 'live', 'Unfortunately', 'while', 'without', 'doubt', 'most', 'prosperous', 'and', 'dynamic', 'world', 'has', 'ever', 'created', "'s", 'some', 'major', 'flaws.One', 'them', 'every', 'an', 'ecological', 'footprint', 'It', 'amount', 'impact', 'measurable', 'How', 'much', 'stuff'

dict_keys(['When', 'you', 'think', 'about', 'resilience', 'and', 'technology', 'it', "'s", 'actually', 'much', 'easier', '.', 'You', "'re", 'going', 'to', 'see', 'some', 'other', 'speakers', 'today', ',', 'I', 'already', 'know', 'who', 'are', 'talk', 'breaking-bones', 'stuff', 'of', 'course', 'with', 'never', 'is', 'So', 'very', 'easy', 'comparatively', 'speaking', 'be', 'resilient', 'that', 'if', 'we', 'look', 'at', 'what', 'happened', 'on', 'the', 'Internet', 'such', 'an', 'incredible', 'last', 'half', 'a', 'dozen', 'years', 'hard', 'even', 'get', 'right', 'analogy', 'for', 'A', 'lot', 'how', 'decide', 'supposed', 'react', 'things', 'expect', 'future', 'depends', 'bucket', 'categorize', 'them.And', 'so', 'tempting', 'boom-bust', 'just', 'went', 'through', 'gold', 'rush', 'It', 'this', 'as', 'different', 'from', 'might', 'pick', 'For', 'one', 'thing', 'both', 'were', 'real', 'In', '1849', 'in', 'Gold', 'Rush', 'they', 'took', 'over', '$', '700', 'million', 'worth', 'out', 'California'

dict_keys(['Thank', 'you', '.', 'And', 'I', 'feel', 'like', 'this', 'whole', 'evening', 'has', 'been', 'very', 'amazing', 'to', 'me', 'it', "'s", 'sort', 'of', 'the', 'Vimalakirti', 'Sutra', ',', 'an', 'ancient', 'work', 'from', 'India', 'in', 'which', 'Buddha', 'appears', 'at', 'beginning', 'and', 'a', 'bunch', 'people', 'come', 'see', 'him', 'biggest', 'city', 'area', 'Vaishali', 'they', 'bring', 'some', 'jeweled', 'parasols', 'make', 'offering', 'All', 'young', 'actually', 'The', 'old', 'fogeys', 'do', "n't", 'because', "'re", 'mad', 'when', 'he', 'came', 'their', 'accepted', '—', 'always', 'accepts', 'first', 'invitation', 'that', 'comes', 'whoever', 'is', 'local', 'geisha', 'movie-star', 'person', 'raced', 'elders', 'chariot', 'invited', 'first.So', 'was', 'hanging', 'out', 'with', 'movie', 'star', 'course', 'were', 'grumbling', ':', '``', 'He', 'supposed', 'be', 'religious', 'all', 'What', 'doing', 'over', 'there', 'Amrapali', 'house', 'his', '500', 'monks', "''", 'so', 'on', 'Th

dict_keys(['Like', 'many', 'of', 'you', 'here', ',', 'I', 'am', 'trying', 'to', 'contribute', 'towards', 'a', 'renaissance', 'in', 'Africa', '.', 'The', 'question', 'transformation', 'really', 'is', 'leadership', 'can', 'only', 'be', 'transformed', 'by', 'enlightened', 'leaders', 'And', 'it', 'my', 'contention', 'that', 'the', 'manner', 'which', 'we', 'educate', 'our', 'fundamental', 'progress', 'on', 'this', 'continent', 'want', 'tell', 'some', 'stories', 'explain', 'view', 'We', 'all', 'heard', 'about', 'importance', 'yesterday', 'An', 'American', 'friend', 'mine', 'year', 'volunteered', 'as', 'nurse', 'Ghana', 'and', 'period', 'three', 'months', 'she', 'came', 'conclusion', 'state', 'had', 'taken', 'me', 'over', 'decade', 'reach', 'Twice', 'was', 'involved', 'surgeries', 'where', 'they', 'lost', 'power', 'at', 'hospital', 'emergency', 'generators', 'did', 'not', 'start', 'There', 'flashlight', 'lantern', 'candle', '—', 'pitch', 'black', 'patient', "'s", 'cut', 'open', 'twice', 'firs

dict_keys(['Chris', 'Anderson', ':', 'Welcome', 'to', 'TED.Richard', 'Branson', 'Thank', 'you', 'very', 'much', '.', 'The', 'first', 'TED', 'has', 'been', 'great.CA', 'Have', 'met', 'anyone', 'interesting', '?', 'RB', 'Well', ',', 'the', 'nice', 'thing', 'about', 'is', 'everybody', "'s", 'I', 'was', 'glad', 'see', 'Goldie', 'Hawn', 'because', 'had', 'an', 'apology', 'make', 'her', "'d", 'dinner', 'with', 'two', 'years', 'ago', 'and', '—', 'she', 'this', 'big', 'wedding', 'ring', 'put', 'it', 'on', 'my', 'finger', 'could', "n't", 'get', 'off', 'And', 'went', 'home', 'wife', 'that', 'night', 'wanted', 'know', 'why', 'another', 'woman', 'massive', 'anyway', 'next', 'morning', 'we', 'go', 'along', 'jeweler', 'cut', 'So', '(', 'Laughter', ')', 'so', 'apologies', 'Goldie.CA', 'That', 'pretty', 'good', "'re", 'going', 'up', 'some', 'slides', 'of', 'your', 'companies', 'here', 'You', "'ve", 'started', 'one', 'or', 'in', 'time', 'Virgin', 'Atlantic', 'Records', 'guess', 'all', 'a', 'magazine', 

dict_keys(['Thank', 'you', 'for', 'putting', 'up', 'these', 'pictures', 'of', 'my', 'colleagues', 'over', 'here', '.', '(', 'Laughter', ')', 'We', "'ll", 'be', 'talking', 'about', 'them', 'Now', ',', 'I', "'m", 'going', 'try', 'an', 'experiment', 'do', "n't", 'experiments', 'normally', 'a', 'theorist', 'But', 'see', 'what', 'happens', 'if', 'press', 'this', 'button', 'Sure', 'enough', 'OK', 'used', 'to', 'work', 'in', 'field', 'elementary', 'particles', 'What', 'matter', 'chop', 'it', 'very', 'fine', '?', 'is', 'made', 'And', 'the', 'laws', 'are', 'valid', 'throughout', 'universe', 'and', 'they', "'re", 'much', 'connected', 'with', 'history', 'universe.We', 'know', 'lot', 'four', 'forces', 'There', 'must', 'more', 'but', 'those', 'at', 'small', 'distances', 'we', 'have', 'really', 'interacted', 'yet', 'The', 'main', 'thing', 'want', 'talk', ':', 'that', 'remarkable', 'experience', 'fundamental', 'physics', 'beauty', 'successful', 'criterion', 'choosing', 'right', 'theory', 'why', 'on',

dict_keys(['I', "'m", 'a', 'contemporary', 'artist', 'and', 'show', 'in', 'art', 'galleries', 'museums', '.', 'number', 'of', 'photographs', 'films', ',', 'but', 'also', 'make', 'television', 'programs', 'books', 'some', 'advertising', 'all', 'with', 'the', 'same', 'concept', 'And', 'it', "'s", 'about', 'our', 'fixation', 'celebrity', 'culture', 'importance', 'image', ':', 'is', 'born', 'photography.I', 'going', 'to', 'start', 'how', 'started', 'this', 'seven', 'years', 'ago', 'when', 'Princess', 'Diana', 'died', 'There', 'was', 'sort', 'standstill', 'Britain', 'moment', 'her', 'death', 'people', 'decided', 'mourn', 'mass', 'way', 'fascinated', 'by', 'phenomenon', 'so', 'wondered', 'could', 'one', 'erase', 'actually', 'quite', 'crudely', 'physically', '?', 'So', 'got', 'gun', 'shoot', 'at', "n't", 'from', 'my', 'memory', 'certainly', 'not', 'being', 'erased', 'public', 'psyche', 'Momentum', 'built', 'The', 'press', 'wrote', 'rather', 'felt', 'pornographic', 'ways', '—', 'like', '``', '

dict_keys(['I', "'m", 'going', 'to', 'go', 'right', 'into', 'the', 'slides', '.', 'And', 'all', 'try', 'and', 'prove', 'you', 'with', 'these', 'is', 'that', 'do', 'just', 'very', 'straight', 'stuff', 'my', 'ideas', 'are', '—', 'in', 'head', ',', 'anyway', 'they', "'re", 'logical', 'relate', 'what', "'s", 'on', 'problem', 'solving', 'for', 'clients', 'either', 'convince', 'at', 'end', 'solve', 'their', 'problems', 'or', 'really', 'because', 'usually', 'seem', 'like', 'it.Let', 'me', 'Can', 'turn', 'off', 'light', '?', 'Down', 'be', 'dark', "n't", 'want', 'see', 'doing', 'up', 'here', '(', 'Laughter', ')', 'Anyway', 'did', 'this', 'house', 'Santa', 'Monica', 'it', 'got', 'a', 'lot', 'of', 'notoriety', 'In', 'fact', 'appeared', 'porno', 'comic', 'book', 'which', 'slide', 'This', 'Venice', 'show', 'know', 'concerned', 'about', 'context', 'On', 'left-hand', 'side', 'had', 'those', 'little', 'houses', 'tried', 'build', 'building', 'fit', 'When', 'people', 'take', 'pictures', 'buildings', 'ou

dict_keys(['This', 'is', 'the', 'Large', 'Hadron', 'Collider', '.', 'It', "'s", '27', 'kilometers', 'in', 'circumference', 'biggest', 'scientific', 'experiment', 'ever', 'attempted', 'Over', '10,000', 'physicists', 'and', 'engineers', 'from', '85', 'countries', 'around', 'world', 'have', 'come', 'together', 'over', 'several', 'decades', 'to', 'build', 'this', 'machine', 'What', 'we', 'do', 'accelerate', 'protons', '—', 'so', ',', 'hydrogen', 'nuclei', '99.999999', 'percent', 'speed', 'of', 'light', 'Right', '?', 'At', 'that', 'they', 'go', '11,000', 'times', 'a', 'second', 'And', 'collide', 'them', 'with', 'another', 'beam', 'going', 'opposite', 'direction', 'We', 'inside', 'giant', 'detectors.They', "'re", 'essentially', 'digital', 'cameras', 'one', 'I', 'work', 'on', 'ATLAS', 'You', 'get', 'some', 'sense', 'size', 'you', 'can', 'just', 'see', 'these', 'EU', 'standard-size', 'people', 'underneath', '(', 'Laughter', ')', ':', '44', 'meters', 'wide', '22', 'diameter', '7,000', 'tons', '

dict_keys(['Last', 'year', ',', 'I', 'told', 'you', 'the', 'story', 'in', 'seven', 'minutes', 'of', 'Project', 'Orion', 'which', 'was', 'this', 'very', 'implausible', 'technology', 'that', 'technically', 'could', 'have', 'worked', 'but', 'it', 'had', 'one-year', 'political', 'window', 'where', 'happened', '.', 'So', 'did', "n't", 'happen', 'It', 'a', 'dream', 'not', 'This', "'m", 'going', 'to', 'tell', 'birth', 'digital', 'computing', 'perfect', 'introduction', 'And', "'s", 'work', 'and', 'machines', 'are', 'all', 'around', 'us', 'inevitable', 'If', 'people', 'about', 'if', 'they', 'done', 'somebody', 'else', 'would', 'sort', 'right', 'idea', 'at', 'time.This', 'is', 'Barricelli', 'universe', 'we', 'live', 'now', 'these', 'doing', 'things', 'including', 'changing', 'biology', 'starting', 'with', 'first', 'atomic', 'bomb', 'Trinity', 'Manhattan', 'little', 'bit', 'like', 'TED', ':', 'brought', 'whole', 'lot', 'smart', 'together', 'three', 'smartest', 'were', 'Stan', 'Ulam', 'Richard', '

dict_keys(['I', 'and', 'my', 'colleagues', 'Art', 'Aron', 'Lucy', 'Brown', 'others', ',', 'have', 'put', '37', 'people', 'who', 'are', 'madly', 'in', 'love', 'into', 'a', 'functional', 'MRI', 'brain', 'scanner', '.', '17', 'were', 'happily', '15', 'had', 'just', 'been', 'dumped', 'we', "'re", 'starting', 'our', 'third', 'experiment', ':', 'studying', 'report', 'that', 'they', 'still', 'after', '10', 'to', '25', 'years', 'of', 'marriage', 'So', 'this', 'is', 'the', 'short', 'story', 'research.In', 'jungles', 'Guatemala', 'Tikal', 'stands', 'temple', 'It', 'was', 'built', 'by', 'grandest', 'Sun', 'King', 'city-state', 'civilization', 'Americas', 'Mayas', 'His', 'name', 'Jasaw', 'Chan', "K'awiil", 'He', 'stood', 'over', 'six', 'feet', 'tall', 'lived', 'his', '80s', 'he', 'buried', 'beneath', 'monument', '720', 'AD', 'And', 'Mayan', 'inscriptions', 'proclaim', 'deeply', 'with', 'wife', 'her', 'honor', 'facing', 'every', 'spring', 'autumn', 'exactly', 'at', 'equinox', 'sun', 'rises', 'behin

dict_keys(['Let', "'s", 'just', 'start', 'by', 'looking', 'at', 'some', 'great', 'photographs', '.', 'This', 'is', 'an', 'icon', 'of', 'National', 'Geographic', ',', 'Afghan', 'refugee', 'taken', 'Steve', 'McCurry', 'But', 'the', 'Harvard', 'Lampoon', 'about', 'to', 'come', 'out', 'with', 'a', 'parody', 'and', 'I', 'shudder', 'think', 'what', 'they', "'re", 'going', 'do', 'this', 'photograph', 'Oh', 'wrath', 'Photoshop.This', 'jet', 'landing', 'San', 'Francisco', 'Bruce', 'Dale', 'He', 'mounted', 'camera', 'on', 'tail', 'A', 'poetic', 'image', 'for', 'story', 'Tolstoy', 'Sam', 'Abell', 'Pygmies', 'in', 'DRC', 'Randy', 'Olson', 'love', 'because', 'it', 'reminds', 'me', 'Degas', "'", 'bronze', 'sculptures', 'little', 'dancer', 'polar', 'bear', 'swimming', 'Arctic', 'Paul', 'Nicklen', 'Polar', 'bears', 'need', 'ice', 'be', 'able', 'move', 'back', 'forth', '—', 'not', 'very', 'good', 'swimmers', 'we', 'know', 'happening', 'These', 'are', 'camels', 'moving', 'across', 'Rift', 'Valley', 'Afr

dict_keys(['We', 'really', 'need', 'to', 'put', 'the', 'best', 'we', 'have', 'offer', 'within', 'reach', 'of', 'our', 'children', '.', 'If', 'do', "n't", 'that', ',', "'re", 'going', 'get', 'generation', 'deserve', 'They', 'learn', 'from', 'whatever', 'it', 'is', 'they', 'around', 'them.And', 'as', 'now', 'elite', 'parents', 'librarians', 'professionals', 'a', 'bunch', 'activities', 'are', 'in', 'fact', 'trying', 'those', 'us', 'or', 'broadly', 'can', 'I', "'m", 'start', 'and', 'end', 'this', 'talk', 'with', 'couple', 'things', 'carved', 'stone', 'One', 'what', "'s", 'on', 'Boston', 'Public', 'Library', 'Carved', 'above', 'their', 'door', '``', 'Free', 'All', "''", 'It', 'kind', 'an', 'inspiring', 'statement', "'ll", 'go', 'back', 'at', 'librarian', 'bring', 'all', 'works', 'knowledge', 'many', 'people', 'want', 'read', 'And', 'idea', 'using', 'technology', 'perfect', 'for', 'think', 'opportunity', 'one-up', 'Greeks', 'not', 'easy', 'But', 'industriousness', 'Egyptians', 'were', 'able'

dict_keys(['A', 'year', 'ago', ',', 'I', 'spoke', 'to', 'you', 'about', 'a', 'book', 'that', 'was', 'just', 'in', 'the', 'process', 'of', 'completing', 'has', 'come', 'out', 'interim', 'and', 'would', 'like', 'talk', 'today', 'some', 'controversies', 'inspired', '.', 'The', 'is', 'called', '``', 'Blank', 'Slate', "''", 'based', 'on', 'popular', 'idea', 'human', 'mind', 'blank', 'slate', 'all', 'its', 'structure', 'comes', 'from', 'socialization', 'culture', 'parenting', 'experience', 'an', 'influential', '20th', 'century', 'Here', 'are', 'few', 'quotes', 'indicating', ':', 'Man', 'no', 'nature', 'historian', 'Jose', 'Ortega', 'y', 'Gasset', ';', 'instincts', 'anthropologist', 'Ashley', 'Montagu', 'brain', 'capable', 'full', 'range', 'behaviors', 'predisposed', 'none', 'late', 'scientist', 'Stephen', 'Jay', 'Gould.There', 'number', 'reasons', 'doubt', 'them', 'common', 'sense', 'As', 'many', 'people', 'have', 'told', 'me', 'over', 'years', 'anyone', 'who', "'s", 'had', 'more', 'than', '

dict_keys(['I', 'think', 'all', 'of', 'us', 'have', 'been', 'interested', ',', 'at', 'one', 'time', 'or', 'another', 'in', 'the', 'romantic', 'mysteries', 'those', 'societies', 'that', 'collapsed', 'such', 'as', 'classic', 'Maya', 'Yucatan', 'Easter', 'Islanders', 'Anasazi', 'Fertile', 'Crescent', 'society', 'Angor', 'Wat', 'Great', 'Zimbabwe', 'and', 'so', 'on', '.', 'And', 'within', 'last', 'decade', 'two', 'archaeologists', 'shown', 'there', 'were', 'environmental', 'problems', 'underlying', 'many', 'these', 'past', 'collapses', 'But', 'also', 'plenty', 'places', 'world', 'where', 'developing', 'for', 'thousands', 'years', 'without', 'any', 'sign', 'a', 'major', 'collapse', 'Japan', 'Java', 'Tonga', 'Tikopea', 'So', 'evidently', 'some', 'areas', 'are', 'more', 'fragile', 'than', 'other', 'How', 'can', 'we', 'understand', 'what', 'makes', '?', 'The', 'problem', 'is', 'obviously', 'relevant', 'to', 'our', 'situation', 'today', 'because', 'well', 'already', 'Somalia', 'Rwanda', 'former

dict_keys(['I', 'thought', "'d", 'start', 'with', 'telling', 'you', 'or', 'showing', 'the', 'people', 'who', 'started', '[', 'Jet', 'Propulsion', 'Lab', ']', '.', 'When', 'they', 'were', 'a', 'bunch', 'of', 'kids', ',', 'kind', 'very', 'imaginative', 'adventurous', 'as', 'trying', 'at', 'Caltech', 'to', 'mix', 'chemicals', 'and', 'see', 'which', 'one', 'blows', 'up', 'more', 'Well', 'do', "n't", 'recommend', 'that', 'try', 'now', 'Naturally', 'blew', 'shack', 'well', 'then', 'hey', 'go', 'Arroyo', 'really', 'all', 'your', 'tests', 'in', 'there.So', "'s", 'what', 'we', 'call', 'our', 'first', 'five', 'employees', 'during', 'tea', 'break', 'know', 'here', 'As', 'said', 'matter', 'fact', 'them', 'was', 'part', 'cult', 'not', 'too', 'far', 'from', 'on', 'Orange', 'Grove', 'unfortunately', 'he', 'himself', 'because', 'kept', 'mixing', 'figure', 'out', 'ones', 'best', 'So', 'gives', 'flavor', 'have', 'there', 'We', 'avoid', 'blowing', 'ourselves', 'up.This', 'show', 'Guess', 'is', 'JPL', 'em

dict_keys(['So', 'I', 'understand', 'that', 'this', 'meeting', 'was', 'planned', ',', 'and', 'the', 'slogan', 'From', 'Was', 'to', 'Still', '.', 'And', 'am', 'illustrating', 'Which', 'of', 'course', 'not', 'agreeing', 'with', 'because', 'although', '94', 'still', 'working', 'anybody', 'who', 'asks', 'me', '``', 'Are', 'you', 'doing', 'or', '?', "''", 'do', "n't", 'answer', "'m", 'things', 'it', 'like', 'always', 'did', 'have', '—', 'use', 'word', 'mean', '(', 'Laughter', ')', 'my', 'file', 'which', 'is', 'called', 'To', 'Do', 'plans', 'clients', 'work', 'takes', 'care', 'age', 'want', 'show', 'so', 'know', 'what', 'why', 'here', 'This', 'about', '1925', 'All', 'these', 'were', 'made', 'during', 'last', '75', 'years', 'Applause', 'But', 'since', '25', 'more', 'less', 'see', 'Castleton', 'China', 'an', 'exhibition', 'at', 'Museum', 'Modern', 'Art', 'now', 'for', 'sale', 'Metropolitan', 'a', 'portrait', 'daughter', 'myself', 'These', 'just', 'some', "'ve", 'hundreds', 'them', 'call', 'mak

dict_keys(['I', 'was', 'listed', 'on', 'the', 'online', 'biography', 'that', 'said', 'a', 'design', 'missionary', '.', 'That', "'s", 'bit', 'lofty', ';', "'m", 'really', 'more', 'of', 'something', 'like', 'street', 'walker', 'spend', 'lot', 'time', 'in', 'urban', 'areas', 'looking', 'for', ',', 'and', 'studying', 'public', 'sector', 'take', 'about', '5,000', 'photographs', 'year', 'thought', 'would', 'edit', 'from', 'these', 'try', 'to', 'come', 'up', 'with', 'some', 'images', 'might', 'be', 'appropriate', 'interesting', 'you', 'And', 'used', 'three', 'criteria', ':', 'first', "'d", 'talk', 'real', 'within', 'reach', 'free', 'not', 'quite', 'as', 'we', "'re", 'fondly', 'known', 'by', 'our', 'competition', 'competitors', 'but', 'stuff', 'can', 'find', 'streets', 'available', 'all', 'people', 'probably', 'contains', 'other', 'important', 'messages.I', "'ll", 'use', 'sidewalks', 'Rio', 'an', 'example', 'A', 'very', 'common', 'done', "'50s", 'It', 'got', 'nice', 'kind', 'flowing', 'organic

dict_keys(['Right', 'when', 'I', 'was', '15', 'first', 'got', 'interested', 'in', 'solar', 'energy', '.', 'My', 'family', 'had', 'moved', 'from', 'Fort', 'Lee', ',', 'New', 'Jersey', 'to', 'California', 'the', 'snow', 'lots', 'of', 'heat', 'and', 'gas', 'lines', 'There', 'rationing', '1973', 'The', 'crisis', 'full', 'bore.I', 'started', 'reading', '``', 'Popular', 'Science', "''", 'magazine', 'really', 'excited', 'about', 'potential', 'try', 'solve', 'that', 'just', 'taken', 'trigonometry', 'high', 'school', 'learned', 'parabola', 'how', 'it', 'could', 'concentrate', 'rays', 'light', 'a', 'single', 'focus', 'That', 'me', 'very', 'And', 'felt', 'there', 'would', 'be', 'build', 'some', 'kind', 'thing', 'So', 'this', 'company', 'called', 'Solar', 'Devices', 'where', 'built', 'parabolas', 'took', 'metal', 'shop', 'remember', 'walking', 'into', 'building', 'Stirling', 'engines', 'engine', 'over', 'on', 'lathe', 'all', 'motorcycle', 'guys', 'said', 'You', "'re", 'bong', 'are', "n't", 'you', 

dict_keys(['The', 'new', 'me', 'is', 'beauty', '.', '(', 'Laughter', ')', 'Yeah', ',', 'people', 'used', 'to', 'say', '``', 'Norman', "'s", 'OK', 'but', 'if', 'you', 'followed', 'what', 'he', 'said', 'everything', 'would', 'be', 'usable', 'it', 'ugly', "''", 'Well', 'I', 'did', "n't", 'have', 'that', 'in', 'mind', 'so', '...', 'This', 'neat', 'Thank', 'for', 'setting', 'up', 'my', 'display', 'mean', 'just', 'wonderful', 'And', 'the', 'slightest', 'idea', 'of', 'does', 'or', 'good', 'want', 'life', 'My', 'trying', 'understand', 'about', 'and', 'pretty', 'emotions', 'all', 'making', 'things', 'kind', 'fun.And', 'this', 'a', 'Philippe', 'Starck', 'juicer', 'produced', 'by', 'Alessi', 'It', ';', 'fun', 'much', 'house', '—', 'entryway', 'do', 'use', 'make', 'juice', 'In', 'fact', 'bought', 'gold-plated', 'special', 'edition', 'comes', 'with', 'little', 'slip', 'paper', 'says', 'Do', 'acid', 'will', 'ruin', 'gold', 'plating', 'So', 'actually', 'took', 'carton', 'orange', 'poured', 'glass', '

dict_keys(['I', 'thought', "'d", 'begin', 'with', 'a', 'scene', 'of', 'war', '.', 'There', 'was', 'little', 'to', 'warn', 'the', 'danger', 'ahead', 'The', 'Iraqi', 'insurgent', 'had', 'placed', 'IED', ',', 'an', 'Improvised', 'Explosive', 'Device', 'along', 'side', 'road', 'great', 'care', 'By', '2006', 'there', 'were', 'more', 'than', '2,500', 'these', 'attacks', 'every', 'single', 'month', 'and', 'they', 'leading', 'cause', 'casualties', 'among', 'American', 'soldiers', 'civilians', 'team', 'that', 'hunting', 'for', 'this', 'is', 'called', 'EOD', 'team—', 'Explosives', 'Ordinance', 'Disposal—and', "'re", 'pointy', 'end', 'spear', 'in', 'effort', 'suppress', 'roadside', 'bombs', 'Each', 'goes', 'out', 'on', 'about', '600', 'bomb', 'calls', 'year', 'defusing', 'two', 'day', 'Perhaps', 'best', 'sign', 'how', 'valuable', 'are', 'insurgents', 'put', '$', '50,000', 'bounty', 'head', 'soldier.Unfortunately', 'particular', 'call', 'would', 'not', 'well', 'time', 'soldier', 'advanced', 'close

dict_keys(['This', 'machine', ',', 'which', 'we', 'all', 'have', 'residing', 'in', 'our', 'skulls', 'reminds', 'me', 'of', 'an', 'aphorism', 'a', 'comment', 'Woody', 'Allen', 'to', 'ask', 'about', 'what', 'is', 'the', 'very', 'best', 'thing', 'within', 'your', 'skull', '.', 'And', 'it', "'s", 'this', 'constructed', 'for', 'change', 'It', 'confers', 'on', 'us', 'ability', 'do', 'things', 'tomorrow', 'that', 'ca', "n't", 'today', 'could', 'yesterday', 'course', 'born', 'stupid.The', 'last', 'time', 'you', 'were', 'presence', 'baby', '—', 'happens', 'be', 'my', 'granddaughter', 'Mitra', 'Is', 'she', 'fabulous', '?', '(', 'Laughter', ')', 'But', 'nonetheless', 'when', 'popped', 'out', 'despite', 'fact', 'her', 'brain', 'had', 'actually', 'been', 'progressing', 'its', 'development', 'several', 'months', 'before', 'basis', 'experiences', 'womb', 'limited', 'abilities', 'as', 'does', 'every', 'infant', 'at', 'normal', 'natural', 'full-term', 'birth', 'If', 'assay', 'perceptual', 'they', 'woul

dict_keys(['I', "'m", 'here', 'because', 'have', 'a', 'very', 'important', 'message', ':', 'think', 'we', 'found', 'the', 'most', 'factor', 'for', 'success', '.', 'And', 'it', 'was', 'close', 'to', ',', 'Stanford', 'Psychology', 'professor', 'took', 'kids', 'that', 'were', 'four', 'years', 'old', 'and', 'put', 'them', 'in', 'room', 'all', 'by', 'themselves', 'he', 'would', 'tell', 'child', 'four-year-old', 'kid', '``', 'Johnny', 'am', 'going', 'leave', 'you', 'with', 'marshmallow', '15', 'minutes', 'If', 'after', 'come', 'back', 'this', 'is', 'will', 'get', 'another', 'one', 'So', 'two', "''", 'To', 'wait', 'something', 'they', 'like', 'equivalent', 'telling', 'us', 'We', "'ll", 'bring', 'coffee', 'hours', '(', 'Laughter', ')', 'Exact', 'equivalent.So', 'what', 'happened', 'when', 'left', '?', 'As', 'soon', 'as', 'door', 'closed', '...', 'out', 'of', 'three', 'ate', 'Five', 'seconds', '10', '40', '50', 'eight', 'Some', 'lasted', '14-and-a-half', 'Could', "n't", 'do', 'not', 'What', "'s

dict_keys(['I', "'ll", 'start', 'with', 'my', 'favorite', 'muse', ',', 'Emily', 'Dickinson', 'who', 'said', 'that', 'wonder', 'is', 'not', 'knowledge', 'neither', 'it', 'ignorance', '.', 'It', "'s", 'something', 'which', 'suspended', 'between', 'what', 'we', 'believe', 'can', 'be', 'and', 'a', 'tradition', 'may', 'have', 'forgotten', 'And', 'think', 'when', 'listen', 'to', 'these', 'incredible', 'people', 'here', "'ve", 'been', 'so', 'inspired', '—', 'many', 'ideas', 'visions', 'yet', 'look', 'at', 'the', 'environment', 'outside', 'you', 'see', 'how', 'resistant', 'architecture', 'change', 'You', 'those', 'very', 'We', 'them', 'out', 'create', 'things', 'end', 'hard', 'wall', 'applaud', 'well-mannered', 'box', 'But', 'space', 'never', 'existed', 'interests', 'me', ';', 'has', 'entered', 'except', 'in', 'our', 'minds', 'spirits', 'really', 'based', 'on.Architecture', 'on', 'concrete', 'steel', 'elements', 'of', 'soil', 'created', 'greatest', 'cities', 'spaces', 'had', 'indeed', 'story',

dict_keys(['I', "'m", 'going', 'to', 'talk', 'about', 'your', 'mindset', '.', 'Does', 'correspond', 'my', 'dataset', '?', '(', 'Laughter', ')', 'If', 'not', ',', 'one', 'or', 'the', 'other', 'needs', 'upgrading', 'is', "n't", 'it', 'When', 'students', 'global', 'issues', 'and', 'listen', 'them', 'in', 'coffee', 'break', 'they', 'always', '``', 'we', "''", 'And', 'when', 'come', 'back', 'into', 'lecture', 'room', 'ask', 'What', 'do', 'you', 'mean', 'with', 'Oh', "'s", 'very', 'easy', 'It', 'western', 'world', 'developing', 'say', 'We', 'learned', 'college', 'what', 'definition', 'then', 'The', 'Everyone', 'knows', 'say.But', 'know', 'press', 'like', 'this', 'So', 'girl', 'said', 'cleverly', 'Western', 'a', 'long', 'life', 'small', 'family', 'Developing', 'short', 'large', 'that', 'because', 'enabled', 'me', 'transfer', 'their', 'here', 'have', 'can', 'see', 'on', 'axis', 'size', 'of', 'One', 'two', 'three', 'four', 'five', 'children', 'per', 'woman', 'length', 'expectancy', '30', '40', 

dict_keys(['Clearly', ',', 'we', "'re", 'living', 'in', 'a', 'moment', 'of', 'crisis', '.', 'Arguably', 'the', 'financial', 'markets', 'have', 'failed', 'us', 'and', 'aid', 'system', 'is', 'failing', 'yet', 'I', 'stand', 'firmly', 'with', 'optimists', 'who', 'believe', 'that', 'there', 'has', 'probably', 'never', 'been', 'more', 'exciting', 'to', 'be', 'alive', 'Because', 'some', 'technologies', "'ve", 'talking', 'about', 'resources', 'skills', 'certainly', 'surge', 'talent', 'seeing', 'all', 'around', 'world', 'mindset', 'create', 'change', 'And', 'got', 'president', 'sees', 'himself', 'as', 'global', 'citizen', 'recognizes', 'no', 'longer', 'single', 'superpower', 'but', 'engage', 'different', 'way', 'world.And', 'by', 'definition', 'every', 'one', 'you', 'this', 'room', 'must', 'consider', 'yourself', 'soul', 'You', 'work', 'on', 'front', 'lines', 'seen', 'best', 'worst', 'human', 'beings', 'can', 'do', 'for', 'another', 'matter', 'what', 'country', 'live', 'or', 'also', 'extraordin

dict_keys(['One', 'of', 'the', 'biggest', 'challenges', 'in', 'computer', 'graphics', 'has', 'been', 'being', 'able', 'to', 'create', 'a', 'photo-real', ',', 'digital', 'human', 'face', '.', 'And', 'one', 'reasons', 'it', 'is', 'so', 'difficult', 'that', 'unlike', 'aliens', 'and', 'dinosaurs', 'we', 'look', 'at', 'faces', 'every', 'day', 'They', 'are', 'very', 'important', 'how', 'communicate', 'with', 'each', 'other', 'As', 'result', "'re", 'tuned', 'subtlest', 'things', 'could', 'possibly', 'be', 'wrong', 'rendering', 'order', 'believe', 'whether', 'these', 'realistic.And', 'what', 'I', "'m", 'going', 'do', 'next', 'five', 'minutes', 'take', 'you', 'through', 'process', 'where', 'tried', 'reasonably', 'photo-realistic', 'computer-generated', 'using', 'some', 'technology', "'ve", 'developed', 'also', 'collaborators', 'company', 'called', 'Image', 'Metrics', 'try', 'an', 'actress', 'named', 'Emily', "O'Brien", 'who', 'right', 'there', "'s", 'actually', 'completely', 'her', 'By', 'end',

dict_keys(['It', "'s", 'a', 'bit', 'funny', 'to', 'be', 'at', 'conference', 'dedicated', 'things', 'not', 'seen', ',', 'and', 'present', 'my', 'proposal', 'build', '6,000-kilometer-long', 'wall', 'across', 'the', 'entire', 'African', 'continent', '.', 'About', 'size', 'of', 'Great', 'Wall', 'China', 'this', 'would', 'hardly', 'an', 'invisible', 'structure', 'And', 'yet', 'it', 'made', 'from', 'parts', 'that', 'are', 'or', 'near-invisible', 'naked', 'eye', ':', 'bacteria', 'grains', 'sand.Now', 'as', 'architects', 'we', "'re", 'trained', 'solve', 'problems', 'But', 'I', 'do', "n't", 'really', 'believe', 'in', 'architectural', ';', 'only', 'opportunities', 'Which', 'is', 'why', "'ll", 'show', 'you', 'threat', 'response', 'The', 'desertification', 'My', 'sandstone', 'solidified', 'sand', 'stretching', 'desert.Now', 'magical', 'material', 'beautiful', 'contradictions', 'simple', 'complex', 'peaceful', 'violent', 'always', 'same', 'never', 'endlessly', 'fascinating', 'One', 'billion', 'come

dict_keys(['Metaphor', 'lives', 'a', 'secret', 'life', 'all', 'around', 'us', '.', 'We', 'utter', 'about', 'six', 'metaphors', 'minute', 'Metaphorical', 'thinking', 'is', 'essential', 'to', 'how', 'we', 'understand', 'ourselves', 'and', 'others', ',', 'communicate', 'learn', 'discover', 'invent', 'But', 'metaphor', 'way', 'of', 'thought', 'before', 'it', 'with', 'words.Now', 'assist', 'me', 'in', 'explaining', 'this', 'I', "'ve", 'enlisted', 'the', 'help', 'one', 'our', 'greatest', 'philosophers', 'reigning', 'king', 'metaphorians', 'man', 'whose', 'contributions', 'field', 'are', 'so', 'great', 'that', 'he', 'himself', 'has', 'become', 'am', 'course', 'referring', 'none', 'other', 'than', 'Elvis', 'Presley', '(', 'Laughter', ')', 'Now', '``', 'All', 'Shook', 'Up', "''", 'love', 'song', 'It', "'s", 'also', 'example', 'whenever', 'deal', 'anything', 'abstract', '—', 'ideas', 'emotions', 'feelings', 'concepts', 'thoughts', 'inevitably', 'resort', 'In', 'touch', 'not', 'but', 'chill', 'Li

dict_keys(['Namaste', '.', 'Good', 'morning', 'I', "'m", 'very', 'happy', 'to', 'be', 'here', 'in', 'India', 'And', "'ve", 'been', 'thinking', 'a', 'lot', 'about', 'what', 'have', 'learned', 'over', 'these', 'last', 'particularly', '11', 'years', 'with', 'V-Day', 'and', '``', 'The', 'Vagina', 'Monologues', ',', "''", 'traveling', 'the', 'world', 'essentially', 'meeting', 'women', 'girls', 'across', 'planet', 'stop', 'violence', 'against', 'women.What', 'want', 'talk', 'today', 'is', 'this', 'particular', 'cell', 'or', 'grouping', 'of', 'cells', 'that', 'each', 'every', 'one', 'us', 'call', 'it', 'girl', "'s", 'men', 'as', 'well', 'you', 'imagine', 'central', 'evolution', 'our', 'species', 'continuation', 'human', 'race.And', 'at', 'some', 'point', 'history', 'group', 'powerful', 'people', 'invested', 'owning', 'controlling', 'understood', 'suppression', 'oppression', 'reinterpretation', 'undermining', 'getting', 'believe', 'weakness', 'crushing', 'eradicating', 'destroying', 'reducing'

dict_keys(['Does', 'anybody', 'know', 'when', 'the', 'stethoscope', 'was', 'invented', '?', 'Any', 'guesses', '1816', '.', 'And', 'what', 'I', 'can', 'say', 'is', ',', 'in', '2016', 'doctors', 'are', "n't", 'going', 'to', 'be', 'walking', 'around', 'with', 'stethoscopes', 'There', "'s", 'a', 'whole', 'lot', 'better', 'technology', 'coming', 'and', 'that', 'part', 'of', 'change', 'medicine.What', 'has', 'changed', 'our', 'society', 'been', 'wireless', 'devices', 'But', 'future', 'digital', 'medical', 'OK', 'So', 'let', 'me', 'give', 'you', 'some', 'examples', 'this', 'kind', 'make', 'much', 'more', 'concrete', 'This', 'first', 'one', 'an', 'electrocardiogram', 'as', 'cardiologist', 'think', 'could', 'see', 'real', 'time', 'patient', 'individual', 'anywhere', 'world', 'on', 'your', 'smartphone', 'watching', 'rhythm', '—', 'incredible', 'it', 'us', 'today.But', 'just', 'beginning', 'You', 'check', 'email', 'while', "'re", 'sitting', 'here', 'In', 'checking', 'all', 'vital', 'signs', ':', 

## Feature 4: Identifying Key Words