# MVP Cognitive Search Application: Summary 

In [27]:
text = """
09_080661e-00 June 2016 
Page 2 of 3
Golpanol@ ALS

Chemical nature
Sodium allyl sulfonate

PRD-No.
30042690
BASF's commercial product numbers

Structural formula
H,C=CH-CH,-$0,Na

Molecular formula
H5O,NaS

Molar mass (DIN 32625)
144 g/mol

Appearance
Golpanol9 ALS is a clear, colorless or yellowish liquid

Shelf life
Golpanol9 ALS has a shelf life of 2 years in sealed containers.

Properties
Some physical properties are listed in the table below. These are typical values
only and not all of them are monitored on a regular basis. They are correct at the
time of publication and do not necessarily form part of the product specification.
A detailed product specification is available on request or via BASF's WorldAccount:
https:/lworldaccount basf com registered access).

Golpanol9 ALS
Unit
Value

Physical form
liquid

Concentration
24.5- 25.5
(BASF method, bromide bromate titration)

Sulphite content
<0.05
(BASF method, iodometric)

APHA color
20
(DIN EN1557)

Density
glcm3
1.19-1.23
(DIN/ 51757, A$TMD 1298, 23 9C)

pH value
10.5-11.5
(|$0 976, 23 9C)

Solubility
Golpanol9 ALS is miscible with water in all proportions.
"""

In [28]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation

In [29]:
stopwords = list(STOP_WORDS)

In [30]:
nlp = spacy.load('en_core_web_sm')

In [31]:
doc = nlp(text)

In [32]:
tokens = [token.text for token in doc]
print(tokens)

['\n', '09_080661e-00', 'June', '2016', '\n', 'Page', '2', 'of', '3', '\n', 'Golpanol@', 'ALS', '\n\n', 'Chemical', 'nature', '\n', 'Sodium', 'allyl', 'sulfonate', '\n\n', 'PRD', '-', 'No', '.', '\n', '30042690', '\n', 'BASF', "'s", 'commercial', 'product', 'numbers', '\n\n', 'Structural', 'formula', '\n', 'H', ',', 'C', '=', 'CH', '-', 'CH,-$0,Na', '\n\n', 'Molecular', 'formula', '\n', 'H5O', ',', 'NaS', '\n\n', 'Molar', 'mass', '(', 'DIN', '32625', ')', '\n', '144', 'g', '/', 'mol', '\n\n', 'Appearance', '\n', 'Golpanol9', 'ALS', 'is', 'a', 'clear', ',', 'colorless', 'or', 'yellowish', 'liquid', '\n\n', 'Shelf', 'life', '\n', 'Golpanol9', 'ALS', 'has', 'a', 'shelf', 'life', 'of', '2', 'years', 'in', 'sealed', 'containers', '.', '\n\n', 'Properties', '\n', 'Some', 'physical', 'properties', 'are', 'listed', 'in', 'the', 'table', 'below', '.', 'These', 'are', 'typical', 'values', '\n', 'only', 'and', 'not', 'all', 'of', 'them', 'are', 'monitored', 'on', 'a', 'regular', 'basis', '.', 'Th

In [33]:
punctuation = punctuation + '\n'
punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\n'

In [34]:
word_frequencies = {}
for word in doc:
    if word.text.lower() not in stopwords:
        if word.text.lower() not in punctuation:
            if word.text not in word_frequencies.keys():
                word_frequencies[word.text] = 1
            else:
                word_frequencies[word.text] += 1
                
print(word_frequencies)

{'09_080661e-00': 1, 'June': 1, '2016': 1, 'Page': 1, '2': 2, '3': 1, 'Golpanol@': 1, 'ALS': 5, '\n\n': 16, 'Chemical': 1, 'nature': 1, 'Sodium': 1, 'allyl': 1, 'sulfonate': 1, 'PRD': 1, '30042690': 1, 'BASF': 4, 'commercial': 1, 'product': 3, 'numbers': 1, 'Structural': 1, 'formula': 2, 'H': 1, 'C': 1, 'CH': 1, 'CH,-$0,Na': 1, 'Molecular': 1, 'H5O': 1, 'NaS': 1, 'Molar': 1, 'mass': 1, 'DIN': 2, '32625': 1, '144': 1, 'g': 1, 'mol': 1, 'Appearance': 1, 'Golpanol9': 4, 'clear': 1, 'colorless': 1, 'yellowish': 1, 'liquid': 2, 'Shelf': 1, 'life': 2, 'shelf': 1, 'years': 1, 'sealed': 1, 'containers': 1, 'Properties': 1, 'physical': 1, 'properties': 1, 'listed': 1, 'table': 1, 'typical': 1, 'values': 1, 'monitored': 1, 'regular': 1, 'basis': 1, 'correct': 1, 'time': 1, 'publication': 1, 'necessarily': 1, 'form': 2, 'specification': 2, 'detailed': 1, 'available': 1, 'request': 1, 'WorldAccount': 1, 'https:/lworldaccount': 1, 'basf': 1, 'com': 1, 'registered': 1, 'access': 1, 'Unit': 1, 'Value

In [35]:
max_frequency = max(word_frequencies.values())
max_frequency

16

In [36]:
for word in word_frequencies.keys():
    word_frequencies[word] = word_frequencies[word]/max_frequency

print(word_frequencies)

{'09_080661e-00': 0.0625, 'June': 0.0625, '2016': 0.0625, 'Page': 0.0625, '2': 0.125, '3': 0.0625, 'Golpanol@': 0.0625, 'ALS': 0.3125, '\n\n': 1.0, 'Chemical': 0.0625, 'nature': 0.0625, 'Sodium': 0.0625, 'allyl': 0.0625, 'sulfonate': 0.0625, 'PRD': 0.0625, '30042690': 0.0625, 'BASF': 0.25, 'commercial': 0.0625, 'product': 0.1875, 'numbers': 0.0625, 'Structural': 0.0625, 'formula': 0.125, 'H': 0.0625, 'C': 0.0625, 'CH': 0.0625, 'CH,-$0,Na': 0.0625, 'Molecular': 0.0625, 'H5O': 0.0625, 'NaS': 0.0625, 'Molar': 0.0625, 'mass': 0.0625, 'DIN': 0.125, '32625': 0.0625, '144': 0.0625, 'g': 0.0625, 'mol': 0.0625, 'Appearance': 0.0625, 'Golpanol9': 0.25, 'clear': 0.0625, 'colorless': 0.0625, 'yellowish': 0.0625, 'liquid': 0.125, 'Shelf': 0.0625, 'life': 0.125, 'shelf': 0.0625, 'years': 0.0625, 'sealed': 0.0625, 'containers': 0.0625, 'Properties': 0.0625, 'physical': 0.0625, 'properties': 0.0625, 'listed': 0.0625, 'table': 0.0625, 'typical': 0.0625, 'values': 0.0625, 'monitored': 0.0625, 'regular':

In [37]:
sentence_tokens = [sent for sent in doc.sents]
print(sentence_tokens)

[
09_080661e-00 June 2016 
, Page 2 of 3
Golpanol@ ALS

Chemical nature
, Sodium allyl sulfonate

PRD-, No.
, 30042690
, BASF's commercial product numbers

Structural formula
, H,C=CH-CH,-$0,Na

Molecular formula
, H5O,NaS

Molar mass (DIN 32625)
, 144 g/mol

Appearance
Golpanol9 ALS is a clear, colorless or yellowish liquid

Shelf life
, Golpanol9 ALS has a shelf life of 2 years in sealed containers., 

, Properties
, Some physical properties are listed in the table below., These are typical values
only and not all of them are monitored on a regular basis., They are correct at the
time of publication and do not necessarily form part of the product specification., 
, A detailed product specification is available on request or via BASF's WorldAccount:
https:/lworldaccount basf com registered access)., 

Golpanol9, ALS
, Unit
Value

Physical form
liquid

Concentration
24.5- 25.5
, (BASF method, bromide bromate titration)

Sulphite content
<0.05
, (BASF method, iodometric)

APHA color
20


In [38]:
sentence_scores = {}
for sent in sentence_tokens:
    for word in sent:
        if word.text.lower() in word_frequencies.keys():
            if sent not in sentence_scores.keys():
                sentence_scores[sent] = word_frequencies[word.text.lower()]
            else:
                sentence_scores[sent] += word_frequencies[word.text.lower()]
                
sentence_scores

{
 09_080661e-00 June 2016 : 0.125,
 Page 2 of 3
 Golpanol@ ALS
 
 Chemical nature: 1.25,
 Sodium allyl sulfonate
 
 PRD-: 1.125,
 30042690: 0.0625,
 BASF's commercial product numbers
 
 Structural formula: 1.5,
 H,C=CH-CH,-$0,Na
 
 Molecular formula: 1.125,
 H5O,NaS
 
 Molar mass (DIN 32625): 1.125,
 144 g/mol
 
 Appearance
 Golpanol9 ALS is a clear, colorless or yellowish liquid
 
 Shelf life: 2.6875,
 Golpanol9 ALS has a shelf life of 2 years in sealed containers.: 0.5,
 
 : 1.0,
 Properties: 0.0625,
 Some physical properties are listed in the table below.: 0.25,
 These are typical values
 only and not all of them are monitored on a regular basis.: 0.3125,
 They are correct at the
 time of publication and do not necessarily form part of the product specification.: 0.6875,
 A detailed product specification is available on request or via BASF's WorldAccount:
 https:/lworldaccount basf com registered access).: 0.875,
 
 
 Golpanol9: 1.0,
 Unit
 Value
 
 Physical form
 liquid
 
 Concent

In [39]:
from heapq import nlargest

In [40]:
select_length = int(len(sentence_tokens)*0.3)
select_length

8

In [41]:
summary = nlargest(select_length, sentence_scores, key = sentence_scores.get)
summary

[144 g/mol
 
 Appearance
 Golpanol9 ALS is a clear, colorless or yellowish liquid
 
 Shelf life,
 Unit
 Value
 
 Physical form
 liquid
 
 Concentration
 24.5- 25.5,
 BASF's commercial product numbers
 
 Structural formula,
 (BASF method, bromide bromate titration)
 
 Sulphite content
 <0.05,
 
 
 Density
 glcm3
 1.19-1.23
 (DIN/ 51757, A$TMD 1298, 23 9C),
 
 
 pH value
 10.5-11.5
 (|$0 976, 23 9C),
 (BASF method, iodometric)
 
 APHA color
 20
 (DIN EN1557),
 Page 2 of 3
 Golpanol@ ALS
 
 Chemical nature]

In [42]:
final_summary = [word.text for word in summary]
summary = ' '.join(final_summary)

In [43]:
print(text)


09_080661e-00 June 2016 
Page 2 of 3
Golpanol@ ALS

Chemical nature
Sodium allyl sulfonate

PRD-No.
30042690
BASF's commercial product numbers

Structural formula
H,C=CH-CH,-$0,Na

Molecular formula
H5O,NaS

Molar mass (DIN 32625)
144 g/mol

Appearance
Golpanol9 ALS is a clear, colorless or yellowish liquid

Shelf life
Golpanol9 ALS has a shelf life of 2 years in sealed containers.

Properties
Some physical properties are listed in the table below. These are typical values
only and not all of them are monitored on a regular basis. They are correct at the
time of publication and do not necessarily form part of the product specification.
A detailed product specification is available on request or via BASF's WorldAccount:
https:/lworldaccount basf com registered access).

Golpanol9 ALS
Unit
Value

Physical form
liquid

Concentration
24.5- 25.5
(BASF method, bromide bromate titration)

Sulphite content
<0.05
(BASF method, iodometric)

APHA color
20
(DIN EN1557)

Density
glcm3
1.19-1.23
(D

In [44]:
print(summary)

144 g/mol

Appearance
Golpanol9 ALS is a clear, colorless or yellowish liquid

Shelf life
 Unit
Value

Physical form
liquid

Concentration
24.5- 25.5
 BASF's commercial product numbers

Structural formula
 (BASF method, bromide bromate titration)

Sulphite content
<0.05
 

Density
glcm3
1.19-1.23
(DIN/ 51757, A$TMD 1298, 23 9C) 

pH value
10.5-11.5
(|$0 976, 23 9C) (BASF method, iodometric)

APHA color
20
(DIN EN1557) Page 2 of 3
Golpanol@ ALS

Chemical nature

