# Visualize Divergence

This notebook is used to visualize results produced by `runDivergenceRange.py`. Graphs produced by this notebook can be used to determine the convergence of word2vec models trained on diachronic corpus, in order to determine the number of years required for the model to be representative of the corpus. For mor information, read the README.md document.

In [None]:
%pylab inline

In [None]:
import pickle as pkl
from glob import glob

In [None]:
from helpers import getYears
from divergence import computeDivergenceOverYearRange

# IMPORTANT
Set `saveDir` to be the directory where you saved your divergence models. This should be the same which was used as the `--outDir` of `runDivergenceRange.py` script.

In [None]:
saveDir = ''

In [None]:
def doPlots(divergence, sentenceYearCounter):
    semilogx(divergence.keys(), divergence.values())
    xlabel('# Sentences')
    ylabel('Divergence')
    for year,sentenceCount in sentenceYearCounter.iteritems():
        plot([sentenceCount,sentenceCount], [0,1],'--', color='#aaaaaa')
        text(sentenceCount, 0.5, str(year), rotation=270)


In [None]:
def plotSentences(sentenceYearCounter):
    semilogy(sentenceYearCounter.keys(), sentenceYearCounter.values())
    xticks(sentenceYearCounter.keys(), sentenceYearCounter.keys(), rotation=90)
    xlabel('Year')
    ylabel('# Sentences')

In [None]:
files = sorted(glob(saveDir + '/*.pkl'))

for f in files:
    divergence, sentenceYearCounter, vocabSize = pkl.load(open(f, 'rb'))
    plotTitle = f.replace(saveDir + '/divergenceRange_', '').replace('.pkl', '')
    
    figure(figsize=(12,4))
    subplot(1,2,1)
    doPlots(divergence, sentenceYearCounter)
    title(plotTitle)
    
    subplot(1,2,2)
    plotSentences(sentenceYearCounter)