# RocketML text topic modeling using SVD
## Import Libraries

In [None]:
from rocketml.io import DocumentSet
from rocketml.decomposition import TruncatedSVD
from rocketml import Pipeline
import sys
%matplotlib inline

## Create Document Set

In [None]:
filename = "/home/ubuntu/examples/345_blogs_list.txt"
docset = DocumentSet(filename=filename)

svd = TruncatedSVD()
svd.fit(docset)
len(docset.documents)

## Plot Singular Values

In [None]:
import seaborn as sns
sns.tsplot(svd.singular_values_)

## Check term frequencies

In [None]:
import pandas as pd

In [None]:
df = pd.DataFrame([[key,value] for key,value in docset.terms_document_frequencies.iteritems()],columns=["Term","Frequency"])

In [None]:
df_high = df[df["Frequency"] > 120].sort_values(by=["Frequency"],ascending=False)

In [None]:
from bokeh.plotting import figure,show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.models.ranges import Range1d

In [None]:
output_notebook()

In [None]:
source = ColumnDataSource(df_high)

In [None]:
p = figure(plot_width=1000,plot_height=500,title="Term Frequencies",x_range=df_high["Term"].values,y_range=Range1d(0,df_high["Frequency"].max()))
p.vbar(x="Term",top = "Frequency",width=0.2,source=source)
p.add_tools(HoverTool(tooltips=[("Term","@Term"),("Frequency","@Frequency")]))
show(p)