# Computational Language Technologies & Crealogix Language Model Fundamentals

The aim of this script is to summarize and create a prove of concept regarding the Crealogix's language modeling.

# Preparations

## Downloads & install packages and dependencies

In [1]:
def install_packages_dependencies():
    !pip install eng-to-ipa # get phonatics package
    !pip install nltk # Alternative to Spacy package
    !pip install -U spacy # Spacy 
    !python -m spacy download en_core_web_sm # Download the small version of the English language model
    !python -m spacy download en_core_web_lg # Downloads the large version of the English language model
    !python -m spacy download en_core_web_trf # Downloads the English transformers pipeline (Roberta-based)
    !pip install wikipedia-api # Wikipedia

#install_packages_dependencies()

## Imports

In [2]:
# Built-in's
import os, sys, re, time, random, datetime, json, codecs, hashlib, logging, base64, csv, io

# Data manipulation imports
import pandas as pd
import numpy as np

# Language imports
import spacy as sp
import eng_to_ipa as ipa
import nltk

# Scraping imports
from bs4 import BeautifulSoup
import selenium
import webbrowser


## Definitions

In [3]:
# Get tag, text, id and class of html soup element
def get_values(soup, tag_name=''):
    if tag_name and soup:
        for tag in soup.find_all(re.compile(f"^{tag_name}")):
            result = (tag.name, tag.text, tag.get('id'), tag.get('class'))
            return result
    else:
        print('No tag or soup name provided')

# Get tag of html soup element
def get_tags(soup):
    if soup:
        return [tag.name for tag in soup.find_all()]
    else:
        print('No soup provided')

# Find all siblings - for h2 and p pairs e.g.
def find_siblings_before(soup, tag_name='', attrs={}):
    if soup and tag_name:
        if attrs:
            if soup.find(tag_name, attrs=attrs):
                return soup.find(tag_name, attrs=attrs).find_previous_siblings()
        else:
            if soup.find(tag_name):
                return soup.find(tag_name).find_previous_siblings()
            else:
                print(
                    f'No next siblings of <{tag_name}> tag found, please try another tag')
    else:
        print('No soup or tag name provided')

# Find all next siblings - for h2 and p pairs e.g.
def find_siblings_after(soup, tag_name='', attrs={}):
    if soup and tag_name:
        if attrs:
            if soup.find(tag_name, attrs=attrs):
                return soup.find(tag_name, attrs=attrs).find_next_siblings()
        else:
            if soup.find(tag_name):
                return soup.find(tag_name).find_next_siblings()
            else:
                print(
                    f'No next siblings of <{tag_name}> tag found, please try another tag')
    else:
        print('No soup or tag name provided')

# Find all parent - for h2 and p pairs e.g.
def find_parents(soup, tag_name='', attrs={}):
    if soup and tag_name:
        if attrs:
            if soup.find(tag_name, attrs=attrs):
                return soup.find(tag_name, attrs=attrs).find_parents()
        else:
            if soup.find(tag_name):
                return soup.find(tag_name).find_parents()
            else:
                print(
                    f'No parents of <{tag_name}> tag found, please try another tag')
    else:
        print('No soup or tag name provided')

# Find parent - for h2 and p pairs e.g.
def find_parent(soup, tag_name='', attrs={}):
    if soup and tag_name:
        if attrs:
            if soup.find(tag_name, attrs=attrs):
                return soup.find(tag_name, attrs=attrs).find_parent()
        else:
            if soup.find(tag_name):
                return soup.find(tag_name).find_parent()
            else:
                print(
                    f'No parent of <{tag_name}> tag found, please try another tag')
    else:
        print('No soup or tag name provided')


# Data Exploration

## Data Collection

A separate script have been created in order to scrape the data from Investopedia. 
The top-level URL for this scraping work was: <a href="https://www.investopedia.com/financial-term-dictionary-4769738">Investopedia Dictionary</a>.  

First the pickle files have to be loaded and concatenated into one single dataframe. 

In [4]:
alphabet = ['num','a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
# if file is not found false is returned
data = ['data']
df = pd.DataFrame()

for letter in alphabet:
    file = f'./data/{letter.upper()}_investopedia_glossary_Pickle'
    if os.path.isfile(file):
        df_ = pd.read_pickle(file)
        df_ = df_.assign(letter = letter.upper())
        df = pd.concat([df,df_])
        # print(f'{letter.upper()}_investopedia_glossary_Pickle')
        # print(f"The dataframe's size is:\t", df_.shape[0])
        # print('Values scraped empty: \t\t',df_[df_.explanation == ''].explanation.count())
        # print('Values of except block:\t\t',df_[df_.explanation == 'No explanation found'].explanation.count())
        # print('-'*50,'\n')

df = df[['letter','title','explanation','href']]
df.head()

Unnamed: 0,letter,title,explanation,href
0,NUM,0x Protocol,"<div id=""mntl-sc-page_1-0"" class=""comp article...",https://www.investopedia.com/terms/1/0x-protoc...
1,NUM,1%/10 Net 30,"<div id=""mntl-sc-page_1-0"" class=""comp article...",https://www.investopedia.com/terms/1/1-10net30...
2,NUM,10-K,"<div id=""mntl-sc-page_1-0"" class=""comp article...",https://www.investopedia.com/terms/1/10-k.asp
3,NUM,10-K Wrap,"<div id=""mntl-sc-page_1-0"" class=""comp article...",https://www.investopedia.com/terms/1/10k-wrap.asp
4,NUM,10-Q SEC Form,"<div id=""mntl-sc-page_1-0"" class=""comp article...",https://www.investopedia.com/terms/1/10q.asp


The dataframe contains four different columns:  
- letter: The letter describes the location of the expresion in the dictionary.  
- title: The title describes the expresion.
- explanation: The explanation describes the expresion.  
- href: The href is the link to the expresion's source on investopedia. 

For pratical reasons, the explanation was scraped and saved as HTML code in order to be parsed. For pratical reasons. 

## Dataframe description
The data contains 6320 entries describing terms and definitions according to investopedia. 

In [5]:
for letter_ in alphabet:
    print(f'Letter {letter_.upper()}:\thas {df["letter"][df["letter"] == letter_.upper()].count()} entries.')

Letter NUM:	has 59 entries.
Letter A:	has 300 entries.
Letter B:	has 299 entries.
Letter C:	has 300 entries.
Letter D:	has 300 entries.
Letter E:	has 299 entries.
Letter F:	has 299 entries.
Letter G:	has 298 entries.
Letter H:	has 298 entries.
Letter I:	has 300 entries.
Letter J:	has 110 entries.
Letter K:	has 81 entries.
Letter L:	has 300 entries.
Letter M:	has 300 entries.
Letter N:	has 300 entries.
Letter O:	has 298 entries.
Letter P:	has 300 entries.
Letter Q:	has 106 entries.
Letter R:	has 298 entries.
Letter S:	has 298 entries.
Letter T:	has 300 entries.
Letter U:	has 285 entries.
Letter V:	has 196 entries.
Letter W:	has 281 entries.
Letter X:	has 9 entries.
Letter Y:	has 46 entries.
Letter Z:	has 60 entries.


## Manipulations & Pre-Processing

In [6]:
# Check for missing values
missing_values = df[(df['explanation'] == '') | (df['explanation'] == 'No explanation found')].title # keep missing values for quality check for later

# Count how many missing values exists
print('The number of missing values is',missing_values.count(), 'out of', df.shape[0],'values',round(missing_values.count()/df.shape[0]*100,2), '%')

# Check for missing values indexes
print(missing_values.index)

The number of missing values is 30 out of 6320 values 0.47 %
Int64Index([ 67, 215, 155,  60,  61,  71,   0,  62, 141,  22,  93, 117, 176,
             35,  43, 287,  73, 256,  59, 296,   8,   1, 107, 124, 132, 139,
            127,   7,  43,  58],
           dtype='int64')


The number of missing values is quite small compared to the total number of entries.

In [7]:
# Drop the missing rows containing the missing values
df.drop(missing_values.index, inplace=True)

# Check the shape of the dataframe after droping the missing values
df.shape

(5672, 4)

### Transformation of explanation column  
The next print-out shows the html content for the first entry. 

In [8]:
_df_ = df.iloc[0,2]

complete_section = BeautifulSoup(_df_, 'html.parser').find_all(['p','h2'])
paragraphs = BeautifulSoup(_df_, 'html.parser').find_all(['p'])
headings = BeautifulSoup(_df_, 'html.parser').find_all(['h2',])

print(' '.join([element.get_text() for element in complete_section]))

  What Is a 10-K?   
A 10-K is a comprehensive report filed annually by a publicly-traded company about its financial performance and is required by the U.S. Securities and Exchange Commission (SEC). The report contains much more detail than a company's annual report, which is sent to its shareholders before an annual meeting to elect company directors.
 
Some of the information a company is required to document in the 10-K includes its history, organizational structure, financial statements, earnings per share, subsidiaries, executive compensation, and any other relevant data.
 
The SEC requires this report to keep investors aware of a company's financial condition and to allow them to have enough information before they buy or sell shares in the corporation, or before investing in the firm’s corporate bonds.
   Understanding 10-Ks   
Because of the depth and nature of the information they contain, 10-Ks are fairly long and tend to be complicated. But investors need to understand that

Now first, we will create additional columns for the dataframe:
- heading_counts: The number of headings in the explanation.
- paragraph_counts: The number of paragraphs in the explanation.
- explanation_headings: The headings in the explanation.
- explanation_texT: The text in the explanation.

In [9]:
df.tail()

Unnamed: 0,letter,title,explanation,href
53,Z,Zone of Possible Agreement (ZOPA),"<div id=""mntl-sc-page_1-0"" class=""comp article...",https://www.investopedia.com/terms/z/zoneofpos...
54,Z,Zone of Resistance,"<div id=""mntl-sc-page_1-0"" class=""comp article...",https://www.investopedia.com/terms/z/zone-of-r...
55,Z,Zone of Support,"<div id=""mntl-sc-page_1-0"" class=""comp article...",https://www.investopedia.com/terms/z/zone-of-s...
56,Z,Zoning,"<div id=""mntl-sc-page_1-0"" class=""comp article...",https://www.investopedia.com/terms/z/zoning.asp
57,Z,Zoning Ordinance,"<div id=""mntl-sc-page_1-0"" class=""comp article...",https://www.investopedia.com/terms/z/zoning-or...


In [10]:
# Create an empty list
section = []
headers = []

# Loop through html to get the paragraphs and headings
for n,elements in enumerate(df['explanation']):
    section_content = ' '.join([element.get_text() for element in BeautifulSoup(elements, 'html.parser').find_all(['p','h2'])])
    section_headers = [element.get_text() for element in BeautifulSoup(elements, 'html.parser').find_all(['h2'])]
    section.append(section_content)
    headers.append(section_headers)

# Add those lists to the dataframe
df['section'] = np.array(section)
df['headers'] = np.array(headers)
df.head()

<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'li

KeyboardInterrupt: 

In [None]:
df.rename(columns={'explanation':'html_source'})

Unnamed: 0,letter,title,html_source,href,section,headers
2,NUM,10-K,"<div id=""mntl-sc-page_1-0"" class=""comp article...",https://www.investopedia.com/terms/1/10-k.asp,What Is a 10-K? \nA 10-K is a comprehensiv...,"[ What Is a 10-K? , Understanding 10-Ks ,..."
3,NUM,10-K Wrap,"<div id=""mntl-sc-page_1-0"" class=""comp article...",https://www.investopedia.com/terms/1/10k-wrap.asp,What Is a 10-K Wrap? \nA 10-K wrap is a su...,"[ What Is a 10-K Wrap? , Understanding 10-..."
4,NUM,10-Q SEC Form,"<div id=""mntl-sc-page_1-0"" class=""comp article...",https://www.investopedia.com/terms/1/10q.asp,What Is SEC Form 10-Q? \nSEC Form 10-Q is ...,"[ What Is SEC Form 10-Q? , Understanding S..."
5,NUM,10-Year Treasury Note,"<div id=""mntl-sc-page_1-0"" class=""comp article...",https://www.investopedia.com/terms/1/10-yeartr...,What Is a 10-Year Treasury Note? \nThe 10-...,"[ What Is a 10-Year Treasury Note? , Under..."
6,NUM,100% Equities Strategy,"<div id=""mntl-sc-page_1-0"" class=""comp article...",https://www.investopedia.com/terms/1/100-equit...,What Is a 100% Equities Strategy? \nA 100%...,"[ What Is a 100% Equities Strategy? , Unde..."
...,...,...,...,...,...,...
53,Z,Zone of Possible Agreement (ZOPA),"<div id=""mntl-sc-page_1-0"" class=""comp article...",https://www.investopedia.com/terms/z/zoneofpos...,What is a Zone Of Possible Agreement (ZOPA)?...,[ What is a Zone Of Possible Agreement (ZOPA)...
54,Z,Zone of Resistance,"<div id=""mntl-sc-page_1-0"" class=""comp article...",https://www.investopedia.com/terms/z/zone-of-r...,What Is the Zone of Resistance? \nThe zone...,"[ What Is the Zone of Resistance? , Breaki..."
55,Z,Zone of Support,"<div id=""mntl-sc-page_1-0"" class=""comp article...",https://www.investopedia.com/terms/z/zone-of-s...,What Is a Zone of Support? \nA zone of sup...,"[ What Is a Zone of Support? , Understandi..."
56,Z,Zoning,"<div id=""mntl-sc-page_1-0"" class=""comp article...",https://www.investopedia.com/terms/z/zoning.asp,What Is Zoning? \nZoning refers to municip...,"[ What Is Zoning? , How Zoning Works , ..."


In [None]:
path_to_file = './data/investopedia_glossary_transformed.csv'
df.to_csv(path_to_file, index=False)

So, the dataset is now ready to go. For pratical reasons, the transformed dataframe is saved as csv for later use.

## Exploration

In [None]:
df = pd.read_csv(path_to_file)
df.head()

Unnamed: 0,letter,title,explanation,href,section,headers
0,NUM,10-K,"<div id=""mntl-sc-page_1-0"" class=""comp article-body-content mntl-sc-page mntl-block"" data-sc-sti...",https://www.investopedia.com/terms/1/10-k.asp,What Is a 10-K? \nA 10-K is a comprehensive report filed annually by a publicly-traded compa...,"[' What Is a 10-K? ', ' Understanding 10-Ks ', ' Where to Find a 10-K ', ' 10-K Filing De..."
1,NUM,10-K Wrap,"<div id=""mntl-sc-page_1-0"" class=""comp article-body-content mntl-sc-page mntl-block"" data-sc-sti...",https://www.investopedia.com/terms/1/10k-wrap.asp,What Is a 10-K Wrap? \nA 10-K wrap is a summary report of a company's annual performance tha...,"[' What Is a 10-K Wrap? ', ' Understanding 10-K Wrap ', ' Elements of a 10-K Wrap ', ' Sp..."
2,NUM,10-Q SEC Form,"<div id=""mntl-sc-page_1-0"" class=""comp article-body-content mntl-sc-page mntl-block"" data-sc-sti...",https://www.investopedia.com/terms/1/10q.asp,What Is SEC Form 10-Q? \nSEC Form 10-Q is a comprehensive report of financial performance th...,"[' What Is SEC Form 10-Q? ', ' Understanding SEC Form 10-Q ', ' SEC Form 10-Q Filing Deadli..."
3,NUM,10-Year Treasury Note,"<div id=""mntl-sc-page_1-0"" class=""comp article-body-content mntl-sc-page mntl-block"" data-sc-sti...",https://www.investopedia.com/terms/1/10-yeartreasury.asp,What Is a 10-Year Treasury Note? \nThe 10-year Treasury note is a debt obligation issued by ...,"[' What Is a 10-Year Treasury Note? ', ' Understanding 10-Year Treasury Notes ', ' The Adva..."
4,NUM,100% Equities Strategy,"<div id=""mntl-sc-page_1-0"" class=""comp article-body-content mntl-sc-page mntl-block"" data-sc-sti...",https://www.investopedia.com/terms/1/100-equities-strategy.asp,What Is a 100% Equities Strategy? \nA 100% equities strategy is a strategy commonly adopted ...,"[' What Is a 100% Equities Strategy? ', ' Understanding a 100% Equities Strategy ', ' Speci..."


In [None]:
import nltk
import spacy
import pandas as pd
import numpy as np
import pandas_profiling as prof

from pathlib import Path
from textblob import TextBlob

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
matplotlib.rcParams['figure.figsize'] = (10.0, 6.0)
import plotly.graph_objs as go

import cufflinks as cf
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.offline import iplot
from IPython.core.interactiveshell import InteractiveShell

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

InteractiveShell.ast_node_interactivity = 'all'
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True, theme='pearl')

pd.options.display.max_colwidth = 100
pd.options.display.max_columns = 30


### EDA
As it shown below, there are several columns which are not necessary for the analysis. Therefore, the following columns are to be removed:
- Explanation: The explanation is not necessary for the analysis (content is rendered in under "section" and "header")
- Href: The href is not necessary for the analysis.

In [None]:
df.head() 

Unnamed: 0,letter,title,explanation,href,section,headers
0,NUM,10-K,"<div id=""mntl-sc-page_1-0"" class=""comp article-body-content mntl-sc-page mntl-block"" data-sc-sti...",https://www.investopedia.com/terms/1/10-k.asp,What Is a 10-K? \nA 10-K is a comprehensive report filed annually by a publicly-traded compa...,"[' What Is a 10-K? ', ' Understanding 10-Ks ', ' Where to Find a 10-K ', ' 10-K Filing De..."
1,NUM,10-K Wrap,"<div id=""mntl-sc-page_1-0"" class=""comp article-body-content mntl-sc-page mntl-block"" data-sc-sti...",https://www.investopedia.com/terms/1/10k-wrap.asp,What Is a 10-K Wrap? \nA 10-K wrap is a summary report of a company's annual performance tha...,"[' What Is a 10-K Wrap? ', ' Understanding 10-K Wrap ', ' Elements of a 10-K Wrap ', ' Sp..."
2,NUM,10-Q SEC Form,"<div id=""mntl-sc-page_1-0"" class=""comp article-body-content mntl-sc-page mntl-block"" data-sc-sti...",https://www.investopedia.com/terms/1/10q.asp,What Is SEC Form 10-Q? \nSEC Form 10-Q is a comprehensive report of financial performance th...,"[' What Is SEC Form 10-Q? ', ' Understanding SEC Form 10-Q ', ' SEC Form 10-Q Filing Deadli..."
3,NUM,10-Year Treasury Note,"<div id=""mntl-sc-page_1-0"" class=""comp article-body-content mntl-sc-page mntl-block"" data-sc-sti...",https://www.investopedia.com/terms/1/10-yeartreasury.asp,What Is a 10-Year Treasury Note? \nThe 10-year Treasury note is a debt obligation issued by ...,"[' What Is a 10-Year Treasury Note? ', ' Understanding 10-Year Treasury Notes ', ' The Adva..."
4,NUM,100% Equities Strategy,"<div id=""mntl-sc-page_1-0"" class=""comp article-body-content mntl-sc-page mntl-block"" data-sc-sti...",https://www.investopedia.com/terms/1/100-equities-strategy.asp,What Is a 100% Equities Strategy? \nA 100% equities strategy is a strategy commonly adopted ...,"[' What Is a 100% Equities Strategy? ', ' Understanding a 100% Equities Strategy ', ' Speci..."


In [None]:
df = df.drop(columns=['explanation','href'])
df.head()

Unnamed: 0,letter,title,section,headers
0,NUM,10-K,What Is a 10-K? \nA 10-K is a comprehensive report filed annually by a publicly-traded compa...,"[' What Is a 10-K? ', ' Understanding 10-Ks ', ' Where to Find a 10-K ', ' 10-K Filing De..."
1,NUM,10-K Wrap,What Is a 10-K Wrap? \nA 10-K wrap is a summary report of a company's annual performance tha...,"[' What Is a 10-K Wrap? ', ' Understanding 10-K Wrap ', ' Elements of a 10-K Wrap ', ' Sp..."
2,NUM,10-Q SEC Form,What Is SEC Form 10-Q? \nSEC Form 10-Q is a comprehensive report of financial performance th...,"[' What Is SEC Form 10-Q? ', ' Understanding SEC Form 10-Q ', ' SEC Form 10-Q Filing Deadli..."
3,NUM,10-Year Treasury Note,What Is a 10-Year Treasury Note? \nThe 10-year Treasury note is a debt obligation issued by ...,"[' What Is a 10-Year Treasury Note? ', ' Understanding 10-Year Treasury Notes ', ' The Adva..."
4,NUM,100% Equities Strategy,What Is a 100% Equities Strategy? \nA 100% equities strategy is a strategy commonly adopted ...,"[' What Is a 100% Equities Strategy? ', ' Understanding a 100% Equities Strategy ', ' Speci..."


Furthermore, the length of the section text is needs to be created. 
Additionaly, a further column is created showing the TTR (Type-Token Ratio) of the explanation.

In [None]:
# We will add some basic text features to the data

# Add 'Review lenght'
df['num_headers'] = df['headers'].apply(len) #.astype(list).apply(len)
df.headers.iloc[0].replace


"['  What Is a 10-K?  ', '  Understanding 10-Ks  ', '  Where to Find a 10-K  ', '  10-K Filing Deadlines  ', '  Forms 10-Q and 8-K  ']"

In [None]:
# Add 'Review lenght'
df['section_length'] = df['section'].astype(str).apply(len)
# Add simple token count
df['num_tokens'] = df['section'].apply(lambda x: len(str(x).split()))
# Add type 
df['num_types'] = df['section'].apply(lambda x: len(set(str(x).split())))
# Add TTR
df['TTR'] = df['num_types'] / df['num_tokens']
# Add polarity
df['polarity'] = df['section'].map(lambda text: TextBlob(str(text)).sentiment.polarity)

In [None]:
df.head()

Unnamed: 0,letter,title,section,headers,num_headers,section_length,num_tokens,num_types,TTR,polarity
0,NUM,10-K,What Is a 10-K? \nA 10-K is a comprehensive report filed annually by a publicly-traded compa...,"[' What Is a 10-K? ', ' Understanding 10-Ks ', ' Where to Find a 10-K ', ' 10-K Filing De...",133,3759,600,289,0.481667,0.123719
1,NUM,10-K Wrap,What Is a 10-K Wrap? \nA 10-K wrap is a summary report of a company's annual performance tha...,"[' What Is a 10-K Wrap? ', ' Understanding 10-K Wrap ', ' Elements of a 10-K Wrap ', ' Sp...",120,2403,380,194,0.510526,0.050418
2,NUM,10-Q SEC Form,What Is SEC Form 10-Q? \nSEC Form 10-Q is a comprehensive report of financial performance th...,"[' What Is SEC Form 10-Q? ', ' Understanding SEC Form 10-Q ', ' SEC Form 10-Q Filing Deadli...",279,8236,1332,533,0.40015,0.027911
3,NUM,10-Year Treasury Note,What Is a 10-Year Treasury Note? \nThe 10-year Treasury note is a debt obligation issued by ...,"[' What Is a 10-Year Treasury Note? ', ' Understanding 10-Year Treasury Notes ', ' The Adva...",137,3657,580,295,0.508621,0.044611
4,NUM,100% Equities Strategy,What Is a 100% Equities Strategy? \nA 100% equities strategy is a strategy commonly adopted ...,"[' What Is a 100% Equities Strategy? ', ' Understanding a 100% Equities Strategy ', ' Speci...",153,4110,609,329,0.54023,0.118738


In [None]:
df.tail()