**Mounting the google drive on the Colab session**

In [None]:
#Mounting gdrive to gain access to the dataset
from google.colab import drive
drive.mount("/content/gdrive")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


**Installing pyLDAvis module for visualizing the model output and performance**

In [None]:
!pip install pyLDAvis

In [None]:
#to get rid of the warning messages in the output
import warnings
warnings.filterwarnings("ignore")

**Importing necessary libraries**

In [None]:
#Importing necessary libraries
import gensim
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS
from gensim import corpora
from gensim.models import CoherenceModel
import pyLDAvis
import pyLDAvis.gensim_models
from nltk.stem import WordNetLemmatizer, SnowballStemmer
import numpy as np
import nltk
nltk.download('wordnet')
import pandas as pd
import json
import matplotlib.pyplot as plt
%matplotlib inline 

In [None]:
pip install pandas --upgrade

**Importing data and cleaning it off the unwanted columns**

In [None]:
#importing data
data = pd.read_json('/content/gdrive/My Drive/cran_d.json')
data.drop(['author', 'bibliography','title'], axis = 1)

Unnamed: 0,id,body
0,1,experimental investigation of the aerodynamics...
1,2,simple shear flow past a flat plate in an inco...
2,3,the boundary layer in simple shear flow past a...
3,4,approximate solutions of the incompressible la...
4,5,one-dimensional transient heat conduction into...
...,...,...
1395,1396,shear buckling of clamped and simply-supported...
1396,1397,critical shear stress of an infinitely long si...
1397,1398,stability of rectangular plates under shear an...
1398,1399,buckling of transverse stiffened plates under ...


**Data Preprocessing (Stemming, Lemmatizing and Tokenizing)**

In [None]:
#Data Preprocessing
stemmer = SnowballStemmer(language='english')
def lemmatize_stemming(text):
    return stemmer.stem(WordNetLemmatizer().lemmatize(text, pos='v'))
def preprocess(text):
    result = []
    for token in gensim.utils.simple_preprocess(text):
        if token not in gensim.parsing.preprocessing.STOPWORDS and len(token) > 3:
            result.append(lemmatize_stemming(token))
    return result

In [None]:
processed_data = data['body'].map(preprocess)
processed_data

0       [experiment, investig, aerodynam, wing, slipst...
1       [simpl, shear, flow, past, flat, plate, incomp...
2       [boundari, layer, simpl, shear, flow, past, fl...
3       [approxim, solut, incompress, laminar, boundar...
4       [dimension, transient, heat, conduct, doubl, l...
                              ...                        
1395    [shear, buckl, clamp, simpli, support, infinit...
1396    [critic, shear, stress, infinit, long, simpli,...
1397    [stabil, rectangular, plat, shear, bend, forc,...
1398    [buckl, transvers, stiffen, plat, shear, paper...
1399    [buckl, shear, stress, simpli, support, infini...
Name: body, Length: 1400, dtype: object

**Creating dictionary and corpus**

In [None]:
#Creating dictionary and the word corpus
dictionary = corpora.Dictionary(processed_data)
corpus = [dictionary.doc2bow(text) for text in processed_data]

**Creating tf-idf model object on ‘bow_corpus’**

In [None]:
from gensim import corpora, models
tfidf = models.TfidfModel(corpus)
corpus_tfidf = tfidf[corpus]
from pprint import pprint
for doc in corpus_tfidf:
    pprint(doc)
    break

[(0, 0.053302445840307376),
 (1, 0.08206383857277204),
 (2, 0.048917665629352944),
 (3, 0.06499483304247398),
 (4, 0.0783847211538762),
 (5, 0.028340962543825454),
 (6, 0.04495784529265081),
 (7, 0.07385116512341265),
 (8, 0.08119993508064181),
 (9, 0.0630256806900624),
 (10, 0.509307940076426),
 (11, 0.04000670522109575),
 (12, 0.1544567313753436),
 (13, 0.03512348375093592),
 (14, 0.04937572128417836),
 (15, 0.09791849493987904),
 (16, 0.1552670353827112),
 (17, 0.09486618233401206),
 (18, 0.055570484690629675),
 (19, 0.07350588940046916),
 (20, 0.01687521477063807),
 (21, 0.048194417622460475),
 (22, 0.049287002591385444),
 (23, 0.22862026072668198),
 (24, 0.05840643038238435),
 (25, 0.12333639554404417),
 (26, 0.03512348375093592),
 (27, 0.03157347976088682),
 (28, 0.2254914465151426),
 (29, 0.050427687185562314),
 (30, 0.050819352648683),
 (31, 0.08250683576792811),
 (32, 0.038005863257038426),
 (33, 0.07482919458418888),
 (34, 0.095596222296088),
 (35, 0.040268032190421986),
 (36

**Building the model with number of topics(n) = 10**

In [None]:
lda1 = gensim.models.LdaMulticore(corpus_tfidf, num_topics=10, id2word=dictionary, passes=15, workers=4)
for idx, topic in lda1.print_topics(-1):
    print('Topic: {} Word: {}'.format(idx, topic))

Topic: 0 Word: 0.019*"buckl" + 0.019*"shell" + 0.011*"load" + 0.010*"stress" + 0.010*"cylind" + 0.008*"stiffen" + 0.008*"cylindr" + 0.008*"bend" + 0.007*"plat" + 0.007*"elast"
Topic: 1 Word: 0.004*"suction" + 0.003*"balanc" + 0.002*"coolant" + 0.002*"discret" + 0.002*"exchang" + 0.002*"hodograph" + 0.002*"ionize" + 0.002*"tollmien" + 0.002*"hoff" + 0.002*"sweat"
Topic: 2 Word: 0.013*"creep" + 0.009*"column" + 0.008*"fatigu" + 0.004*"throat" + 0.003*"life" + 0.003*"collaps" + 0.003*"grind" + 0.003*"galerkin" + 0.003*"stress" + 0.003*"sampl"
Topic: 3 Word: 0.007*"rough" + 0.004*"corridor" + 0.003*"flame" + 0.003*"molecul" + 0.003*"trip" + 0.003*"lattic" + 0.003*"hypothesi" + 0.002*"entranc" + 0.002*"modul" + 0.002*"depth"
Topic: 4 Word: 0.004*"round" + 0.003*"cruciform" + 0.003*"torsion" + 0.003*"devot" + 0.002*"shield" + 0.002*"fin" + 0.002*"freestream" + 0.002*"ablat" + 0.002*"planar" + 0.002*"reinforc"
Topic: 5 Word: 0.007*"slip" + 0.003*"oval" + 0.003*"hoop" + 0.002*"knudsen" + 0.002

**Performance Evaluation for n=10**

In [None]:
#Computing Perplexity
print('\nPerplexity: ', lda1.log_perplexity(corpus))

# Computing Coherence Score
coherence_lda1 = CoherenceModel(model=lda1, texts=processed_data, dictionary=dictionary, coherence='c_v')
coherence_lda11 = coherence_lda1.get_coherence()
print('\nCoherence Score: ', coherence_lda11)


Perplexity:  -7.03095840369207

Coherence Score:  0.6559759112426058


**Visualizing the n=10 model**

In [None]:
pyLDAvis.enable_notebook()
vis = pyLDAvis.gensim_models.prepare(lda1, corpus, dictionary)
vis

**Building model with n=12**

In [None]:
lda2 = gensim.models.LdaMulticore(corpus_tfidf, num_topics=12, id2word=dictionary, passes=15, workers=4)
for idx, topic in lda2.print_topics(-1):
    print('Topic: {} Word: {}'.format(idx, topic))

Topic: 0 Word: 0.004*"pohlhausen" + 0.004*"fluctuat" + 0.004*"freon" + 0.004*"nois" + 0.004*"airfoil" + 0.003*"major" + 0.003*"contour" + 0.003*"volum" + 0.002*"separ" + 0.002*"wrinkl"
Topic: 1 Word: 0.022*"buckl" + 0.018*"shell" + 0.011*"cylind" + 0.011*"panel" + 0.011*"load" + 0.010*"stress" + 0.009*"creep" + 0.008*"bend" + 0.008*"cylindr" + 0.008*"stiffen"
Topic: 2 Word: 0.003*"eddi" + 0.003*"orbit" + 0.002*"sodium" + 0.002*"sectori" + 0.002*"lee" + 0.002*"structur" + 0.002*"solar" + 0.002*"degreek" + 0.002*"galcit" + 0.002*"thicken"
Topic: 3 Word: 0.007*"layer" + 0.007*"boundari" + 0.007*"heat" + 0.006*"flow" + 0.006*"shock" + 0.005*"solut" + 0.005*"temperatur" + 0.005*"transfer" + 0.005*"equat" + 0.004*"pressur"
Topic: 4 Word: 0.004*"charg" + 0.004*"particl" + 0.003*"disk" + 0.002*"absolut" + 0.002*"advers" + 0.002*"retard" + 0.002*"propel" + 0.002*"vtol" + 0.002*"transport" + 0.002*"pound"
Topic: 5 Word: 0.004*"entropi" + 0.004*"creep" + 0.003*"column" + 0.003*"capac" + 0.003*"mo

**Performance Evaluation on n=12**

In [None]:
#Computing Perplexity
print('\nPerplexity: ', lda2.log_perplexity(corpus))

# Computing Coherence Score
coherence_lda2 = CoherenceModel(model=lda2, texts=processed_data, dictionary=dictionary, coherence='c_v')
coherence_lda21 = coherence_lda2.get_coherence()
print('\nCoherence Score: ', coherence_lda21)


Perplexity:  -7.069868914753971

Coherence Score:  0.5650497922089681


**Visualization of model with n=12**

In [None]:
pyLDAvis.enable_notebook()
vis = pyLDAvis.gensim_models.prepare(lda2, corpus, dictionary)
vis

**Building model with n = 14, computing the metrics and visualizing the performance**

In [None]:
lda3 = gensim.models.LdaMulticore(corpus_tfidf, num_topics=14, id2word=dictionary, passes=15, workers=4)
for idx, topic in lda3.print_topics(-1):
    print('Topic: {} Word: {}'.format(idx, topic))

Topic: 0 Word: 0.008*"sandwich" + 0.005*"orthotrop" + 0.004*"deton" + 0.004*"isotrop" + 0.003*"unpressur" + 0.002*"schlicht" + 0.002*"diatom" + 0.002*"individu" + 0.002*"honeycomb" + 0.002*"statist"
Topic: 1 Word: 0.010*"propel" + 0.010*"nois" + 0.009*"fatigu" + 0.008*"aircraft" + 0.007*"structur" + 0.005*"tilt" + 0.005*"slipstream" + 0.005*"grind" + 0.005*"acoust" + 0.005*"shallow"
Topic: 2 Word: 0.004*"fin" + 0.003*"lattic" + 0.003*"oge" + 0.003*"fold" + 0.003*"preston" + 0.002*"dodecagon" + 0.002*"sextic" + 0.002*"glide" + 0.002*"million" + 0.002*"skip"
Topic: 3 Word: 0.006*"layer" + 0.006*"wing" + 0.006*"flow" + 0.006*"boundari" + 0.005*"bodi" + 0.005*"heat" + 0.005*"shock" + 0.005*"pressur" + 0.005*"number" + 0.005*"solut"
Topic: 4 Word: 0.017*"creep" + 0.010*"column" + 0.009*"detach" + 0.008*"rough" + 0.006*"plastic" + 0.006*"membran" + 0.005*"collaps" + 0.004*"element" + 0.004*"strain" + 0.003*"mechan"
Topic: 5 Word: 0.015*"inject" + 0.013*"nozzl" + 0.007*"blade" + 0.007*"ablat"

In [None]:
#Computing Perplexity
print('\nPerplexity: ', lda3.log_perplexity(corpus))

# Computing Coherence Score
coherence_lda3 = CoherenceModel(model=lda3, texts=processed_data, dictionary=dictionary, coherence='c_v')
coherence_lda31 = coherence_lda3.get_coherence()
print('\nCoherence Score: ', coherence_lda31)


Perplexity:  -7.09254873557678

Coherence Score:  0.5743057349193464


In [None]:
pyLDAvis.enable_notebook()
vis = pyLDAvis.gensim_models.prepare(lda3, corpus, dictionary)
vis

**Building model with n = 16, computing the metrics and visualizing the performance**

In [None]:
lda4 = gensim.models.LdaMulticore(corpus_tfidf, num_topics=16, id2word=dictionary, passes=15, workers=4)
for idx, topic in lda4.print_topics(-1):
    print('Topic: {} Word: {}'.format(idx, topic))

Topic: 0 Word: 0.003*"latitud" + 0.003*"releas" + 0.003*"blow" + 0.003*"night" + 0.003*"suction" + 0.003*"grid" + 0.003*"overexpand" + 0.002*"confin" + 0.002*"claus" + 0.002*"globul"
Topic: 1 Word: 0.011*"layer" + 0.010*"boundari" + 0.009*"shock" + 0.007*"flow" + 0.007*"number" + 0.007*"turbul" + 0.007*"separ" + 0.006*"pressur" + 0.006*"nozzl" + 0.006*"transit"
Topic: 2 Word: 0.014*"buckl" + 0.014*"shell" + 0.009*"cylind" + 0.009*"load" + 0.008*"stress" + 0.007*"creep" + 0.007*"plate" + 0.006*"solut" + 0.006*"cylindr" + 0.006*"bend"
Topic: 3 Word: 0.003*"probe" + 0.003*"deby" + 0.003*"molecul" + 0.003*"wrinkl" + 0.003*"gust" + 0.003*"modulus" + 0.002*"discharg" + 0.002*"airforc" + 0.002*"membran" + 0.002*"colloc"
Topic: 4 Word: 0.004*"restraint" + 0.003*"deton" + 0.003*"maccol" + 0.003*"heavi" + 0.002*"lattic" + 0.002*"nod" + 0.002*"isobar" + 0.002*"melt" + 0.002*"accumul" + 0.002*"hoff"
Topic: 5 Word: 0.004*"space" + 0.004*"simpli" + 0.003*"stiffen" + 0.003*"oge" + 0.003*"support" + 0

In [None]:
#Computing Perplexity
print('\nPerplexity: ', lda4.log_perplexity(corpus))

# Computing Coherence Score
coherence_lda4 = CoherenceModel(model=lda4, texts=processed_data, dictionary=dictionary, coherence='c_v')
coherence_lda41 = coherence_lda4.get_coherence()
print('\nCoherence Score: ', coherence_lda41)


Perplexity:  -7.13904302282289

Coherence Score:  0.599452827833622


In [None]:
pyLDAvis.enable_notebook()
vis = pyLDAvis.gensim_models.prepare(lda4, corpus, dictionary)
vis

**Building model with n = 13, computing the metrics and visualizing the performance**

In [None]:
lda5 = gensim.models.LdaMulticore(corpus_tfidf, num_topics=13, id2word=dictionary, passes=15, workers=4)
for idx, topic in lda5.print_topics(-1):
    print('Topic: {} Word: {}'.format(idx, topic))

Topic: 0 Word: 0.008*"suction" + 0.006*"navier" + 0.004*"slipstream" + 0.004*"perpendicular" + 0.004*"flexur" + 0.003*"generalis" + 0.003*"fourth" + 0.003*"couett" + 0.003*"discharg" + 0.003*"apex"
Topic: 1 Word: 0.004*"aeroelast" + 0.003*"crocco" + 0.003*"wrinkl" + 0.003*"falkner" + 0.003*"skan" + 0.003*"kernel" + 0.003*"tailplan" + 0.003*"reservoir" + 0.003*"panel" + 0.003*"essenti"
Topic: 2 Word: 0.005*"sting" + 0.003*"propag" + 0.003*"lattic" + 0.003*"flap" + 0.003*"million" + 0.003*"boom" + 0.003*"hing" + 0.003*"hyperbol" + 0.003*"oge" + 0.002*"split"
Topic: 3 Word: 0.009*"fatigu" + 0.005*"turbin" + 0.004*"strain" + 0.004*"right" + 0.003*"cycl" + 0.003*"damag" + 0.003*"cumul" + 0.002*"southwel" + 0.002*"stress" + 0.002*"product"
Topic: 4 Word: 0.008*"stiffen" + 0.006*"simpli" + 0.006*"clamp" + 0.005*"oseen" + 0.004*"support" + 0.004*"plat" + 0.003*"charg" + 0.003*"matrix" + 0.003*"panel" + 0.002*"mise"
Topic: 5 Word: 0.022*"panel" + 0.004*"flutter" + 0.004*"freon" + 0.004*"buckl" 

In [None]:
pyLDAvis.enable_notebook()
vis = pyLDAvis.gensim_models.prepare(lda5, corpus, dictionary)
vis

**Building model with n = 15, computing the metrics and visualizing the performance**

In [None]:
lda6 = gensim.models.LdaMulticore(corpus_tfidf, num_topics=15, id2word=dictionary, passes=15, workers=4)
for idx, topic in lda6.print_topics(-1):
    print('Topic: {} Word: {}'.format(idx, topic))

Topic: 0 Word: 0.011*"inject" + 0.011*"transfer" + 0.009*"heat" + 0.008*"layer" + 0.008*"transit" + 0.007*"shock" + 0.007*"number" + 0.007*"turbul" + 0.007*"boundari" + 0.007*"tube"
Topic: 1 Word: 0.003*"heater" + 0.003*"liapunov" + 0.002*"claus" + 0.002*"elaps" + 0.002*"knudsen" + 0.002*"inconsist" + 0.002*"invert" + 0.002*"torqu" + 0.002*"extension" + 0.002*"curtain"
Topic: 2 Word: 0.005*"freon" + 0.003*"comment" + 0.003*"lubric" + 0.002*"plume" + 0.002*"accommod" + 0.002*"nacell" + 0.002*"nod" + 0.002*"sublim" + 0.002*"wrinkl" + 0.002*"gorcum"
Topic: 3 Word: 0.006*"prescrib" + 0.004*"vehicl" + 0.003*"radiat" + 0.003*"mainstream" + 0.002*"matric" + 0.002*"emiss" + 0.002*"particl" + 0.002*"nonlift" + 0.002*"buri" + 0.002*"earth"
Topic: 4 Word: 0.003*"multi" + 0.003*"theodorsen" + 0.002*"vertex" + 0.002*"grid" + 0.002*"status" + 0.002*"magnus" + 0.002*"isobar" + 0.002*"code" + 0.002*"sweptback" + 0.002*"convert"
Topic: 5 Word: 0.005*"exit" + 0.005*"converg" + 0.005*"rocket" + 0.004*"ex

In [None]:
pyLDAvis.enable_notebook()
vis = pyLDAvis.gensim_models.prepare(lda6, corpus, dictionary)
vis

**Performance Check of the model with n=12 and n=15 on the unseen data**

n=12

In [None]:
unseen_data1 = "Aerodynamic interaction between propellers of a distributed-propulsion system in forward flight"
corpus1 = dictionary.doc2bow(preprocess(unseen_data1))
for index, score in sorted(lda2[corpus1], key=lambda tup: -1*tup[1]):
    print("Score: {}\t Topic: {}".format(score, lda2.print_topic(index, 2)))

Score: 0.7750461101531982	 Topic: 0.007*"layer" + 0.007*"boundari"
Score: 0.12078623473644257	 Topic: 0.009*"flap" + 0.008*"tail"
Score: 0.010417289100587368	 Topic: 0.018*"wing" + 0.009*"flutter"
Score: 0.010416824370622635	 Topic: 0.004*"charg" + 0.004*"particl"
Score: 0.010416782461106777	 Topic: 0.004*"pohlhausen" + 0.004*"fluctuat"
Score: 0.010416774079203606	 Topic: 0.022*"buckl" + 0.018*"shell"
Score: 0.010416670702397823	 Topic: 0.011*"fatigu" + 0.006*"nois"
Score: 0.0104166679084301	 Topic: 0.003*"eddi" + 0.003*"orbit"
Score: 0.0104166679084301	 Topic: 0.004*"entropi" + 0.004*"creep"
Score: 0.0104166679084301	 Topic: 0.003*"water" + 0.003*"dash"
Score: 0.0104166679084301	 Topic: 0.012*"compressor" + 0.008*"orbit"
Score: 0.0104166679084301	 Topic: 0.003*"frustum" + 0.002*"sound"


n=15

In [None]:
unseen_data2 = "Aerodynamic interaction between propellers of a distributed-propulsion system in forward flight"
corpus2 = dictionary.doc2bow(preprocess(unseen_data2))
for index, score in sorted(lda6[corpus2], key=lambda tup: -1*tup[1]):
    print("Score: {}\t Topic: {}".format(score, lda6.print_topic(index, 2)))

Score: 0.6169230341911316	 Topic: 0.010*"buckl" + 0.009*"shell"
Score: 0.2747434973716736	 Topic: 0.008*"flow" + 0.008*"wing"


n=12

In [None]:
unseen_data3 = "Turbulence Model Could Help Design Aircraft Capable of Handling Extreme Scenarios"
corpus3 = dictionary.doc2bow(preprocess(unseen_data3))
for index, score in sorted(lda2[corpus3], key=lambda tup: -1*tup[1]):
    print("Score: {}\t Topic: {}".format(score, lda2.print_topic(index, 2)))

Score: 0.7126365303993225	 Topic: 0.007*"layer" + 0.007*"boundari"
Score: 0.19476987421512604	 Topic: 0.018*"wing" + 0.009*"flutter"


n=15

In [None]:
unseen_data4 = "Turbulence Model Could Help Design Aircraft Capable of Handling Extreme Scenarios"
corpus4 = dictionary.doc2bow(preprocess(unseen_data4))
for index, score in sorted(lda6[corpus4], key=lambda tup: -1*tup[1]):
    print("Score: {}\t Topic: {}".format(score, lda6.print_topic(index, 2)))

Score: 0.6537162065505981	 Topic: 0.008*"flow" + 0.008*"wing"
Score: 0.24998739361763	 Topic: 0.010*"buckl" + 0.009*"shell"


n=12

In [None]:
unseen_data5 = "Zero emissions hydrogen plane test was part powered by fossil fuels"
corpus5 = dictionary.doc2bow(preprocess(unseen_data5))
for index, score in sorted(lda2[corpus5], key=lambda tup: -1*tup[1]):
    print("Score: {}\t Topic: {}".format(score, lda2.print_topic(index, 2)))

Score: 0.7604056000709534	 Topic: 0.007*"layer" + 0.007*"boundari"
Score: 0.13542647659778595	 Topic: 0.012*"compressor" + 0.008*"orbit"
Score: 0.010417105630040169	 Topic: 0.018*"wing" + 0.009*"flutter"
Score: 0.010416975244879723	 Topic: 0.022*"buckl" + 0.018*"shell"
Score: 0.010416939854621887	 Topic: 0.004*"charg" + 0.004*"particl"
Score: 0.010416872799396515	 Topic: 0.003*"water" + 0.003*"dash"
Score: 0.010416730307042599	 Topic: 0.003*"frustum" + 0.002*"sound"
Score: 0.010416669771075249	 Topic: 0.004*"pohlhausen" + 0.004*"fluctuat"
Score: 0.0104166679084301	 Topic: 0.011*"fatigu" + 0.006*"nois"
Score: 0.0104166679084301	 Topic: 0.009*"flap" + 0.008*"tail"
Score: 0.010416666977107525	 Topic: 0.003*"eddi" + 0.003*"orbit"
Score: 0.010416666977107525	 Topic: 0.004*"entropi" + 0.004*"creep"


n=16

In [None]:
unseen_data6 = "Zero emissions hydrogen plane test was part powered by fossil fuels"
corpus6 = dictionary.doc2bow(preprocess(unseen_data6))
for index, score in sorted(lda6[corpus6], key=lambda tup: -1*tup[1]):
    print("Score: {}\t Topic: {}".format(score, lda6.print_topic(index, 2)))

Score: 0.41834452748298645	 Topic: 0.008*"flow" + 0.008*"wing"
Score: 0.34831538796424866	 Topic: 0.011*"compressor" + 0.007*"orbit"
Score: 0.13333997130393982	 Topic: 0.006*"prescrib" + 0.004*"vehicl"


n=12

In [None]:
unseen_data7 = "how can one detect transition phenomena in boundary layers"
corpus7 = dictionary.doc2bow(preprocess(unseen_data7))
for index, score in sorted(lda2[corpus7], key=lambda tup: -1*tup[1]):
    print("Score: {}\t Topic: {}".format(score, lda2.print_topic(index, 2)))

Score: 0.6726142764091492	 Topic: 0.007*"layer" + 0.007*"boundari"
Score: 0.1884964555501938	 Topic: 0.004*"pohlhausen" + 0.004*"fluctuat"
Score: 0.013889055699110031	 Topic: 0.018*"wing" + 0.009*"flutter"
Score: 0.013889019377529621	 Topic: 0.003*"water" + 0.003*"dash"
Score: 0.013888935558497906	 Topic: 0.022*"buckl" + 0.018*"shell"
Score: 0.013888920657336712	 Topic: 0.004*"charg" + 0.004*"particl"
Score: 0.013888892717659473	 Topic: 0.011*"fatigu" + 0.006*"nois"
Score: 0.013888888992369175	 Topic: 0.003*"eddi" + 0.003*"orbit"
Score: 0.013888888992369175	 Topic: 0.004*"entropi" + 0.004*"creep"
Score: 0.013888888992369175	 Topic: 0.012*"compressor" + 0.008*"orbit"
Score: 0.013888888992369175	 Topic: 0.003*"frustum" + 0.002*"sound"
Score: 0.013888888992369175	 Topic: 0.009*"flap" + 0.008*"tail"


In [None]:
unseen_data8 = "No One Can Explain Why Planes Stay in the Air"
corpus8 = dictionary.doc2bow(preprocess(unseen_data8))
for index, score in sorted(lda2[corpus8], key=lambda tup: -1*tup[1]):
    print("Score: {}\t Topic: {}".format(score, lda2.print_topic(index, 2)))

Score: 0.6944437623023987	 Topic: 0.018*"wing" + 0.009*"flutter"
Score: 0.02777812071144581	 Topic: 0.003*"eddi" + 0.003*"orbit"
Score: 0.027777912095189095	 Topic: 0.004*"pohlhausen" + 0.004*"fluctuat"
Score: 0.027777863666415215	 Topic: 0.022*"buckl" + 0.018*"shell"
Score: 0.027777837589383125	 Topic: 0.007*"layer" + 0.007*"boundari"
Score: 0.027777792885899544	 Topic: 0.009*"flap" + 0.008*"tail"
Score: 0.02777778171002865	 Topic: 0.012*"compressor" + 0.008*"orbit"
Score: 0.02777778171002865	 Topic: 0.011*"fatigu" + 0.006*"nois"
Score: 0.02777777798473835	 Topic: 0.004*"charg" + 0.004*"particl"
Score: 0.02777777798473835	 Topic: 0.004*"entropi" + 0.004*"creep"
Score: 0.02777777798473835	 Topic: 0.003*"water" + 0.003*"dash"
Score: 0.02777777798473835	 Topic: 0.003*"frustum" + 0.002*"sound"


n=15

In [None]:
unseen_data9 = "No One Can Explain Why Planes Stay in the Air"
corpus9 = dictionary.doc2bow(preprocess(unseen_data9))
for index, score in sorted(lda6[corpus9], key=lambda tup: -1*tup[1]):
    print("Score: {}\t Topic: {}".format(score, lda6.print_topic(index, 2)))

Score: 0.35555583238601685	 Topic: 0.011*"inject" + 0.011*"transfer"
Score: 0.35555514693260193	 Topic: 0.008*"flow" + 0.008*"wing"
Score: 0.022222332656383514	 Topic: 0.010*"buckl" + 0.009*"shell"
Score: 0.02222222462296486	 Topic: 0.003*"corridor" + 0.002*"raleigh"
Score: 0.02222222089767456	 Topic: 0.003*"heater" + 0.003*"liapunov"
Score: 0.02222222089767456	 Topic: 0.005*"freon" + 0.003*"comment"
Score: 0.02222222089767456	 Topic: 0.006*"prescrib" + 0.004*"vehicl"
Score: 0.02222222089767456	 Topic: 0.003*"multi" + 0.003*"theodorsen"
Score: 0.02222222089767456	 Topic: 0.005*"exit" + 0.005*"converg"
Score: 0.02222222089767456	 Topic: 0.005*"blast" + 0.005*"hyperson"
Score: 0.02222222089767456	 Topic: 0.005*"freestream" + 0.003*"con"
Score: 0.02222222089767456	 Topic: 0.003*"hodograph" + 0.003*"vane"
Score: 0.02222222089767456	 Topic: 0.007*"blade" + 0.006*"impel"
Score: 0.02222222089767456	 Topic: 0.009*"slipstream" + 0.008*"tilt"
Score: 0.02222222089767456	 Topic: 0.011*"compressor"