## Install Requirements
!conda install -c conda-forge ptable -y

## Required Import

In [1]:
import math
import random
import numpy as np
import pandas as pd
import nltk
from nltk.util import ngrams
nltk.data.path.append('.')
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/aftab.alam/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

## Prepare data

In [2]:
DATA_PATH='.'
data_file = 'en_US.twitter.txt'
file = f'{DATA_PATH}/{data_file}'
with open(file, "r") as f:
    data = f.read()
print("Data type:", type(data))
print("Number of letters:", len(data))
print("First 300 letters of the data")
print("-------")
display(data[0:300])
print("-------")

print("Last 300 letters of the data")
print("-------")
display(data[-300:])
print("-------")
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

Data type: <class 'str'>
Number of letters: 3335477
First 300 letters of the data
-------


"How are you? Btw thanks for the RT. You gonna be in DC anytime soon? Love to see you. Been way, way too long.\nWhen you meet someone special... you'll know. Your heart will beat more rapidly and you'll smile for no reason.\nthey've decided its more fun if I don't.\nSo Tired D; Played Lazer Tag & Ran A "

-------
Last 300 letters of the data
-------


"ust had one a few weeks back....hopefully we will be back soon! wish you the best yo\nColombia is with an 'o'...“: We now ship to 4 countries in South America (fist pump). Please welcome Columbia to the Stunner Family”\n#GutsiestMovesYouCanMake Giving a cat a bath.\nCoffee after 5 was a TERRIBLE idea.\n"

-------


## Define tokens

In [72]:
SOS='$' # start of sentence token
EOS='√' # end of senetence token
UNK='ß' # unknown word token

In [73]:
# create vocabulary using training data.
# replace less occuring words with <unk>
class vocabulary:
    def __init__(self,tokens):
        """
        list of train tokenized sentences 
        """
        self.tokens = tokens
        self.char_counts = None
    
    def count_char(self):
        """
        counts words and create a frequency dict
        """
        counts = {}
        for sentence in self.tokens:
            for char in sentence:
                if char in counts:
                    counts[char] += 1
                else:
                    counts[char] = 1
        self.char_counts = counts
    def build_vocab(self,threshold):
        """
        creates a closed vocab with words ocurring less than threshold replaced with <unk>
        """
        closed_vocab = []
        if not self.char_counts:
            self.count_char()
        for ch , cnt in self.char_counts.items():
            if cnt >= threshold:
                closed_vocab.append(ch)
        self.vocab = closed_vocab
        self.char2int = {ch: self.vocab.index(ch) for ch in self.vocab }
        self.int2char = {self.vocab.index(ch): ch for ch in self.vocab }
        return self.vocab              

        

In [74]:
vocab = vocabulary(["adada hum relply go bach","how are you"])
vocab.build_vocab(1)
vocab.char2int


['a',
 'd',
 ' ',
 'h',
 'u',
 'm',
 'r',
 'e',
 'l',
 'p',
 'y',
 'g',
 'o',
 'b',
 'c',
 'w']

{'a': 0,
 'd': 1,
 ' ': 2,
 'h': 3,
 'u': 4,
 'm': 5,
 'r': 6,
 'e': 7,
 'l': 8,
 'p': 9,
 'y': 10,
 'g': 11,
 'o': 12,
 'b': 13,
 'c': 14,
 'w': 15}

In [141]:
## Prepare dataset

class DataSet:
    def __init__(self,data_file,):
        with open(data_file, "r") as f:
            self.data = f.read()
        self._preprocess()
    def _preprocess(self):
        self.data = '\n'.join([sentence.lower() for sentence in self.data.split('\n')])
    
    def split_to_data(self,train=.8,dev=.1,test=.1):
        """
        Split data by linebreak "\n"

        Args:
            data: str

        Returns:
            A list of sentences
        """
        sentences = nltk.tokenize.sent_tokenize(self.data)
        print(f'Total Number of sentences: {len(sentences)}')
        random.seed(87)
        random.shuffle(sentences)

        test_size = int(len(sentences) * test)
        self.test_data = sentences[0:test_size]
        train_dev_data = sentences[test_size:]
        dev_size = int(len(sentences) * dev)
        self.dev_data = train_dev_data[0:dev_size]
        self.train_data = train_dev_data[dev_size:]
    def vocab(self,threshold):
        self.closed_vocab = set(vocabulary(tokens=self.train_data).build_vocab(threshold=threshold))
    
    def tokenize_sentences(self, data, n):
        """
        Tokenize sentences into tokens (words)

        Args:
            sentences: List of strings

        Returns:
            List of lists of tokens
        """
        ngram_tokenized_sentences = []
        # Go through each sentence in train data 
        for sentence in data:
            # Convert into a list of words
            # ## add <s> and <e> tokens in data
            sentence = SOS*(n-1) + sentence
            tokenized=[]
            for char_tuples in ngrams(sentence,n):
                new_tuple=()
                for ch in char_tuples:
                    if ch != SOS and ch not in self.closed_vocab: # replace less frequent world with UNK
                        ch = UNK
                    new_tuple = new_tuple + (ch,)
                tokenized.append(new_tuple)
            # append the list of words to the list of lists
            ngram_tokenized_sentences.append(tokenized)

        return ngram_tokenized_sentences
        
    def get_tokenized_data(self,n_grams):
        if not self.train_data:
            self.split_to_data()

        self.ngram_tokenized = self.tokenize_sentences(self.train_data, n_grams)
        self.ngram_minus1_tokenized = self.tokenize_sentences(self.train_data, n_grams-1)
        #self.ngram_minus2_tokenized = self.tokenize_sentences(self.train_data, n_grams-2)
        self.test_tokenized = self.tokenize_sentences(self.test_data,n_grams)
        self.dev_tokenized = self.tokenize_sentences(self.dev_data,n_grams)
        return self.ngram_minus1_tokenized, self.ngram_tokenized, self.test_tokenized ,self.dev_tokenized
    

In [142]:
for tup in ngrams("abcders",2):
    print(tup)

('a', 'b')
('b', 'c')
('c', 'd')
('d', 'e')
('e', 'r')
('r', 's')


In [143]:
%%time
THESHOLD=2
nGram =3
dataset = DataSet(data_file=file)
dataset.split_to_data()
dataset.vocab(THESHOLD)
closed_vocab = dataset.closed_vocab
ngram_1minus_tokenized,ngram_tokenized, test_data,dev_data = dataset.get_tokenized_data(nGram)

Total Number of sentences: 55661
CPU times: user 5.28 s, sys: 51.5 ms, total: 5.34 s
Wall time: 5.34 s


In [144]:
ngram_1minus_tokenized[0:1]
dev_data[0:1]

[[('$', 'h'),
  ('h', 'a'),
  ('a', 'h'),
  ('h', 'a'),
  ('a', 'h'),
  ('h', 'a'),
  ('a', 'a'),
  ('a', '\n'),
  ('\n', 'f'),
  ('f', 'a'),
  ('a', 'b'),
  ('b', 'u'),
  ('u', 'l'),
  ('l', 'o'),
  ('o', 'u'),
  ('u', 's'),
  ('s', ' '),
  (' ', 'd'),
  ('d', 'e'),
  ('e', 's'),
  ('s', 'i'),
  ('i', 'g'),
  ('g', 'n'),
  ('n', ' '),
  (' ', 't'),
  ('t', 'i'),
  ('i', 'p'),
  ('p', ':'),
  (':', ' '),
  (' ', 'y'),
  ('y', 'o'),
  ('o', 'u'),
  ('u', 'r'),
  ('r', ' '),
  (' ', 'h'),
  ('h', 'o'),
  ('o', 'm'),
  ('m', 'e'),
  ('e', ' '),
  (' ', 'c'),
  ('c', 'a'),
  ('a', 'n'),
  ('n', ' '),
  (' ', 'h'),
  ('h', 'a'),
  ('a', 'v'),
  ('v', 'e'),
  ('e', ' '),
  (' ', 't'),
  ('t', 'h'),
  ('h', 'e'),
  ('e', ' '),
  (' ', 'e'),
  ('e', 's'),
  ('s', 's'),
  ('s', 'e'),
  ('e', 'n'),
  ('n', 'c'),
  ('c', 'e'),
  ('e', ' '),
  (' ', 'o'),
  ('o', 'f'),
  ('f', ' '),
  (' ', 'y'),
  ('y', 'o'),
  ('o', 'u'),
  ('u', 'r'),
  ('r', ' '),
  (' ', 'f'),
  ('f', 'a'),
  ('a', 'v'),
  ('

[[('$', '$', 'g'),
  ('$', 'g', 'o'),
  ('g', 'o', '\n'),
  ('o', '\n', 't'),
  ('\n', 't', 'h'),
  ('t', 'h', 'a'),
  ('h', 'a', 'n'),
  ('a', 'n', 'k'),
  ('n', 'k', ' '),
  ('k', ' ', 'y'),
  (' ', 'y', 'o'),
  ('y', 'o', 'u'),
  ('o', 'u', ' '),
  ('u', ' ', 'a'),
  (' ', 'a', 's'),
  ('a', 's', 'h'),
  ('s', 'h', 'l'),
  ('h', 'l', 'e'),
  ('l', 'e', 'y'),
  ('e', 'y', '.')]]

### split dataset in train and set

In [145]:
print("Data are split into {} ngramtrain,{} dev and {} test set".format(
    len(ngram_tokenized), len(dev_data), len(test_data)))
print(f'Length vocab including UNK, SOS, and EOS is {len(closed_vocab)}')
print("First training sample:")
print(ngram_tokenized[0])
      
print("First test sample")
print(test_data[0])

Data are split into 44529 ngramtrain,5566 dev and 5566 test set
Length vocab including UNK, SOS, and EOS is 221
First training sample:
[('$', '$', 'h'), ('$', 'h', 'a'), ('h', 'a', 'h'), ('a', 'h', 'a'), ('h', 'a', 'h'), ('a', 'h', 'a'), ('h', 'a', 'a'), ('a', 'a', '\n'), ('a', '\n', 'f'), ('\n', 'f', 'a'), ('f', 'a', 'b'), ('a', 'b', 'u'), ('b', 'u', 'l'), ('u', 'l', 'o'), ('l', 'o', 'u'), ('o', 'u', 's'), ('u', 's', ' '), ('s', ' ', 'd'), (' ', 'd', 'e'), ('d', 'e', 's'), ('e', 's', 'i'), ('s', 'i', 'g'), ('i', 'g', 'n'), ('g', 'n', ' '), ('n', ' ', 't'), (' ', 't', 'i'), ('t', 'i', 'p'), ('i', 'p', ':'), ('p', ':', ' '), (':', ' ', 'y'), (' ', 'y', 'o'), ('y', 'o', 'u'), ('o', 'u', 'r'), ('u', 'r', ' '), ('r', ' ', 'h'), (' ', 'h', 'o'), ('h', 'o', 'm'), ('o', 'm', 'e'), ('m', 'e', ' '), ('e', ' ', 'c'), (' ', 'c', 'a'), ('c', 'a', 'n'), ('a', 'n', ' '), ('n', ' ', 'h'), (' ', 'h', 'a'), ('h', 'a', 'v'), ('a', 'v', 'e'), ('v', 'e', ' '), ('e', ' ', 't'), (' ', 't', 'h'), ('t', 'h', 

In [146]:
# test your code
tokenized_sentences = ['sky is blue .',
                       'leaves are green .',
                       'roses are red .']
vocab = vocabulary(tokenized_sentences)
tmp_closed_vocab = vocab.build_vocab(threshold=2)
print(f"Closed vocabulary:")
print(tmp_closed_vocab)

Closed vocabulary:
['s', ' ', 'l', 'e', '.', 'a', 'r']


In [147]:
import os
import pickle
from datetime import datetime

class Model:
    def __init__(self,ngrams=2,n1gramsTrain=ngram_1minus_tokenized, ngramsTrain=ngram_tokenized,vocab=closed_vocab):
        self.vocab = vocab
        self.vocab_size = len(self.vocab)
        self.n_grams = ngrams
        self.train_data_ngram = ngramsTrain
        self.train_data_1ngram = n1gramsTrain

    
    def count_n_grams(self,data):
        """
        Count words after ngrams in training data set
        """
        if not data:
            data = self.train_data_ngram
        nGram_cnt ={}
        for sentence in data:
            for tuples in sentence:
                if nGram_cnt.get(tuples,0):
                    nGram_cnt[tuples] +=1
                else:
                    nGram_cnt[tuples] = 1
        return nGram_cnt

    
    def calculate_ngram_probability(self, ngram, smoothing=1):
        """
        calculate probabilities of given ngram
        ngram  = w1,w2,..wn
        n-1gram = w1,w2...wn-1
        = (count(ngram) + k)/(count(n-1gram) + k*V)
        where V is size of vocab
        """
        count_ngram  = self.nGram_cnt.get(ngram,0)
        nminus1_gram = ngram[:-1]
        count_nminus1_gram = self.n1Gram_cnt.get(nminus1_gram,0)
        probs = (count_ngram + smoothing)/(count_nminus1_gram + smoothing* self.vocab_size)
       
        return probs
    
    def train(self):
        """
        calculate ngram and nminus gram counts
        """
        self.nGram_cnt=self.count_n_grams(data=self.train_data_ngram)
        self.n1Gram_cnt=self.count_n_grams(data=self.train_data_1ngram)
    
    def save(self,path,name,checkpoint):
        model_path = f'{path}/{name}'
        if not os.path.exists(model_path):
            os.mkdir(model_path)
        count_df = {'count_ngram':self.nGram_cnt, 'count_nminus1gram':self.n1Gram_cnt}
        with open(f'{model_path}/{checkpoint}.pkl', 'wb') as fp:
            pickle.dump(count_df, fp, protocol=pickle.HIGHEST_PROTOCOL)
    
    def load(self,path,name,checkpoint):
        model_path = f'{path}/{name}'
        with open(f'{model_path}/{checkpoint}.pkl', 'rb') as fp:
            count_df = pickle.load(fp)
            self.nGram_cnt = count_df['count_ngram']
            self.n1Gram_cnt = count_df['count_nminus1gram']
    
    def predict_nextchar(self,ngram):
        """
        Given a ngram find next words and their probabilities
        """
        # n-1 history
        next_hist = ngram[1:]
        probs = {}
        # list of ngrams
        for ngram_tuple in self.nGram_cnt.keys():
            hist = ngram_tuple[:-1]
            char = ngram_tuple[-1]
            if next_hist == hist:
                prob = self.calculate_ngram_probability(ngram_tuple,1)
                probs[char] = prob
        if not probs: # return unknown word if model did not find any thing
            probs = {UNK: 1/self.vocab_size}
        
        return probs
    
        

In [150]:
%%time
# test code
from collections import Counter
model = Model(ngrams=nGram,n1gramsTrain=ngram_1minus_tokenized[0:3000],ngramsTrain=ngram_tokenized[0:3000],vocab=closed_vocab)
print(f"Ngram train set {len(model.train_data_ngram)} ")
print(f"Ngram train set 1st sentence is  {len(model.train_data_ngram[0])} ")
model.train()
model.save('.','char_model',1)


Ngram train set 3000 
Ngram train set 1st sentence is  82 
CPU times: user 83.9 ms, sys: 2.93 ms, total: 86.9 ms
Wall time: 85.5 ms


In [157]:
model = Model(ngrams=nGram,n1gramsTrain=ngram_1minus_tokenized[0:3000],ngramsTrain=ngram_tokenized[0:3000],vocab=closed_vocab)
model.load('.','char_model',1) 
ngram = ngram_tokenized[0][0]
ngram =('$','i', ' ')
model.calculate_ngram_probability(ngram,smoothing=1)
model.predict_nextchar(ngram)

0.3952451708766716

{'a': 0.05465004793863854,
 'w': 0.11313518696069032,
 'c': 0.07094918504314478,
 's': 0.0488974113135187,
 'g': 0.04410354745925216,
 'm': 0.032598274209012464,
 'l': 0.07957813998082454,
 't': 0.04314477468839885,
 'k': 0.029721955896452542,
 'h': 0.07574304889741132,
 'f': 0.02109300095877277,
 'n': 0.030680728667305847,
 '*': 0.0019175455417066154,
 'j': 0.02109300095877277,
 'd': 0.052732502396931925,
 '&': 0.0028763183125599234,
 'p': 0.012464046021093002,
 'r': 0.02205177372962608,
 'o': 0.009587727708533078,
 'q': 0.003835091083413231,
 'u': 0.006711409395973154,
 'b': 0.012464046021093002,
 'v': 0.003835091083413231,
 'e': 0.009587727708533078,
 'i': 0.003835091083413231,
 '❤': 0.0019175455417066154,
 '<': 0.0019175455417066154,
 'y': 0.0028763183125599234,
 '(': 0.0019175455417066154}

In [158]:
model.nGram_cnt.get(('$','i', ' '))

265

# dev_data

In [159]:
def perplexity1(sentence):
    """
    ngram tokenize sentence
    """
    N = len(sentence)
    #cross_entropy = − log2 p(x ̄; θ)/N
    px = 1
    if N:
        for ngram in sentence:
            p = model.calculate_ngram_probability(ngram,smoothing=1)
            px *=p
        cross_entropy = -1 * np.log2(px)/N
    return 2**cross_entropy

In [160]:
perplexity1(dev_data[1])

12.149177030600175

In [161]:
def perplexity2(sentence):
    N = len(sentence)
    #PP = p ** (1/N)
    px = 1
    if N:
        for ngram in sentence:
            p = model.calculate_ngram_probability(ngram,smoothing=1)
            px *= 1/p
        return px ** (1/N)
perplexity2(dev_data[1])

12.149177030600175

In [162]:
%%time
# train using complete training data
model = Model(ngrams=nGram,n1gramsTrain=ngram_1minus_tokenized,ngramsTrain=ngram_tokenized,vocab=closed_vocab)
print(f"Ngram train set {len(model.train_data_ngram)} ")
print(f"Ngram train set 1st sentence is  {len(model.train_data_ngram[0])} ")
model.train()
model.save('.','bigram_model',1)

Ngram train set 44529 
Ngram train set 1st sentence is  82 
CPU times: user 1.07 s, sys: 7.26 ms, total: 1.07 s
Wall time: 1.07 s


In [163]:
sent = []
sent_g = ngrams("i would like to congratulate and for finishing their undergrad classes at wfu today.",2)
for s in sent_g:
    sent = [*sent,s]
perplexity2(sent)

221.00000000000003

In [164]:
# dev data perplexity
dev_perflexity =[]
#dev_data[:1]
for sentence in dev_data:
    pp = perplexity1(sentence) 
    if not pp == float('inf'):
        sent = ''.join(wrd[-1] for wrd in sentence)
        dev_perflexity = [*dev_perflexity,[pp,sent]]
    else: print(f"perplexity of sent {sent} is {pp}")
    # dev data perplexity
test_perflexity =[]
#test_data[:1]
for sentence in test_data:
    pp = perplexity1(sentence) 
    if not pp == float('inf'):
        sent = ''.join(wrd[-1] for wrd in sentence)
        test_perflexity = [*test_perflexity,[pp,sent]]
    else: print(f"perplexity of sent {sent} is {pp}")
# dev data perplexity
train_perflexity =[]
#ngram_tokenized[:1]
for sentence in ngram_tokenized:
    pp = perplexity1(sentence) 
    if not pp == float('inf'):
        sent = ''.join(wrd[-1] for wrd in sentence)
        train_perflexity = [*train_perflexity,[pp,sent]]
    else: print(f"perplexity of sent {sent} is {pp}")

  cross_entropy = -1 * np.log2(px)/N


perplexity of sent bad asa lol im at work listen to my cali music waitin to get off hows dancing going when u go shout me out in yo videos
blue october is the most emotional band i've ever heard. is inf
perplexity of sent bring back mr easy rider lol
what could be better than prepping tax docs & viewing #epl on a saturday morning? is inf
perplexity of sent amazing! is inf
perplexity of sent hey, bri. is inf
perplexity of sent hope you can find something else fun to entertain your guests. is inf
perplexity of sent happy birthday
my doc will also get the disks. is inf
perplexity of sent together we make the best mexican ever! is inf
perplexity of sent see u wednesday! is inf
perplexity of sent looking for a new scent & like some options. is inf
perplexity of sent you're amazing. is inf
perplexity of sent whos in? is inf
perplexity of sent gonna be tweeting from the #filmset tonight. is inf
perplexity of sent but i'm at keuka college. is inf
perplexity of sent full house for cripple of in

perplexity of sent #adele's voice. is inf
perplexity of sent i'm wondering whether banning mobiles in the classroom is missing the point - use technology to support learning
long island
what about your fans from washington? is inf
perplexity of sent you are most welcome! is inf


In [65]:
print(f"mean perflexity on train data: {np.asarray(np.array(train_perflexity)[:,0],dtype='float64').mean()}")

print(f"mean perflexity on dev data: {np.asarray(np.array(dev_perflexity)[:,0],dtype='float64').mean()}")
print(f"mean perflexity on test data: {np.asarray(np.array(test_perflexity)[:,0],dtype='float64').mean()}")

mean perflexity on train data: 10.898957192550023
mean perflexity on dev data: 10.889734799608162
mean perflexity on test data: 10.912576828796414


In [111]:
import operator
test_sentence = test_data[1][3:]
print(test_sentence)
s_prob = []
index=[]
N=0
for tuples in test_sentence:
    prob  = model.predict_nextchar(tuples) # get char with all probs
    pred = max(prob.items(), key=operator.itemgetter(1))[0]
    if pred == '√':
        break
    index = index + [pred]
    s_prob += [prob]
    N +=1
print(''.join(word for word in index))


[(' ', 't'), ('t', 'w'), ('w', 'i'), ('i', 't'), ('t', 't'), ('t', 'e'), ('e', 'r'), ('r', ' '), (' ', 'f'), ('f', 'e'), ('e', 'e'), ('e', 'd'), ('d', ' '), (' ', 'i'), ('i', 's'), ('s', ' '), (' ', 'f'), ('f', 'u'), ('u', 'l'), ('l', 'l'), ('l', ' '), (' ', 'o'), ('o', 'f'), ('f', ' '), (' ', 's'), ('s', 'c'), ('c', 'a'), ('a', 'r'), ('r', 'y'), ('y', ' '), (' ', 't'), ('t', 'o'), ('o', 'r'), ('r', 'n'), ('n', 'a'), ('a', 'd'), ('d', 'o'), ('o', 'e'), ('e', 's'), ('s', '.'), ('.', '√')]
 en   eto   tn to lltuot one t ue n u  


In [186]:
import random
seed = 'i am happy'
ngram_t = ngrams(seed,3)
tokens = [t for t in ngram_t]
# Predict next 3 char and choose 1
for i in range(20):
    prob  = model.predict_nextchar(tokens[-1]) # pass last ngram
    chars = sorted(prob, key=prob.get, reverse=True)[:3]
    ch = random.choice(chars)
    if ch == UNK:
        ch = random.choice(chars)
    print(f"Predicted charaters are:{chars} and selected char is: {ch} and sentence is: {''.join(ch[0] for ch in tokens)}")
    new_gram = tokens[-1][1:]+(ch,)
    tokens = [ *tokens,new_gram]
    
    


Predicted charaters are:[' ', '.', '\n'] and selected char is: . and sentence is: i am hap
Predicted charaters are:['.', '"', 'c'] and selected char is: c and sentence is: i am happ
Predicted charaters are:['o', 'a', '.'] and selected char is: o and sentence is: i am happy
Predicted charaters are:['m', 'n', 'u'] and selected char is: n and sentence is: i am happy.
Predicted charaters are:[' ', 'e', 's'] and selected char is:   and sentence is: i am happy.c
Predicted charaters are:['t', 'a', 'i'] and selected char is: a and sentence is: i am happy.co
Predicted charaters are:[' ', 'n', 'l'] and selected char is:   and sentence is: i am happy.con
Predicted charaters are:['b', 's', 'g'] and selected char is: b and sentence is: i am happy.con 
Predicted charaters are:['e', 'u', 'a'] and selected char is: e and sentence is: i am happy.con a
Predicted charaters are:[' ', 'e', 'r'] and selected char is:   and sentence is: i am happy.con a 
Predicted charaters are:['t', 'a', 'i'] and selected c

In [178]:
('p', 'y', '\n')[1:] + ('a',)

('y', '\n', 'a')

In [129]:
from prettytable import PrettyTable
for label, data in (('Word', words),('Screen Name', screen_names),('Hashtag', hashtags)):

    pt = PrettyTable(field_names=[label, 'Count'])    
    c = Counter(data)    
    [ pt.add_row(kv) for kv in c.most_common()[:10] ]   
    pt.align[label], pt.align['Count'] = 'l', 'r'
    # Set column alignmentprint(pt)

NameError: name 'words' is not defined

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /opt/anaconda3

  added / updated specs:
    - ptable


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ptable-0.9.2               |             py_0          22 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          22 KB

The following NEW packages will be INSTALLED:

  ptable             conda-forge/noarch::ptable-0.9.2-py_0



Downloading and Extracting Packages
ptable-0.9.2         | 22 KB     | ##################################### | 100% 
Preparing transaction: done
Verifying transaction: done
Executing transaction: done
