In [1]:
#Natural Language - Old AI

In [2]:
import io
import requests
import pandas as pd
from simplenlg.framework import *
from simplenlg.lexicon import *
from simplenlg.realiser.english import *
from simplenlg.phrasespec import *
from simplenlg.features import *

In [3]:
url = 'https://raw.githubusercontent.com/dadejecova/Natural-Language-Python/main/dataset/financials.csv'
response = requests.get(url)

In [4]:
if response.status_code == 200:
  #we read this with pnds
  stocks_df = pd.read_csv(io.StringIO(response.text))
  print(stocks_df.sample(10))
else:
  print("Failed to fetch Datset")

    Symbol                       Name                  Sector   Price  \
17     ALK       Alaska Air Group Inc             Industrials   64.04   
90     KMX                 Carmax Inc  Consumer Discretionary   64.34   
295    MMC           Marsh & McLennan              Financials   79.31   
373    PFG  Principal Financial Group              Financials   60.38   
203    BEN         Franklin Resources              Financials   38.00   
386    RRC      Range Resources Corp.                  Energy   12.82   
347    OMC              Omnicom Group  Consumer Discretionary   75.91   
393     RF    Regions Financial Corp.              Financials   17.90   
161    EMN           Eastman Chemical               Materials   93.57   
218    GWW       Grainger (W.W.) Inc.             Industrials  258.60   

     Price/Earnings  Dividend Yield  Earnings/Share  52 Week Low  \
17             9.66        1.992838            8.28      101.430   
90            19.44        0.000000            3.97       77

In [5]:
stocks_df.describe()

Unnamed: 0,Price,Price/Earnings,Dividend Yield,Earnings/Share,52 Week Low,52 Week High,Market Cap,EBITDA,Price/Sales,Price/Book
count,505.0,503.0,505.0,505.0,505.0,505.0,505.0,505.0,505.0,497.0
mean,103.830634,24.80839,1.895953,3.753743,122.623832,83.536616,49239440000.0,3590328000.0,3.941705,14.453179
std,134.427636,41.241081,1.537214,5.689036,155.36214,105.725473,90050170000.0,6840544000.0,3.46011,89.660508
min,2.82,-251.53,0.0,-28.01,6.59,2.8,2626102000.0,-5067000000.0,0.153186,0.51
25%,46.25,15.35,0.794834,1.49,56.25,38.43,12732070000.0,773932000.0,1.62949,2.02
50%,73.92,19.45,1.769255,2.89,86.68,62.85,21400950000.0,1614399000.0,2.89644,3.4
75%,116.54,25.75,2.781114,5.14,140.13,96.66,45119680000.0,3692749000.0,4.703842,6.11
max,1806.06,520.15,12.661196,44.09,2067.99,1589.0,809508000000.0,79386000000.0,20.094294,1403.38


In [6]:
lexicon = Lexicon.getDefaultLexicon()

realiser = Realiser(lexicon)

nlgFactory = NLGFactory(lexicon)

In [7]:
def create_descriptions(row):

    subject = nlgFactory.createNounPhrase(
        "The company " + 
        str(row['Name'])
    )

    verb1 = nlgFactory.createVerbPhrase(
        "is part of the " + 
        str(row['Sector']) + 
        " Sector"
    )

    object1 = nlgFactory.createNounPhrase(
        " and is currently trading at $" + 
        str(row['Price']) + " per share."
    )

    clause1 = nlgFactory.createClause(subject, verb1, object1)

    verb2 = nlgFactory.createVerbPhrase(
        "Boasts"
    )

    object2 = nlgFactory.createNounPhrase(
        "a market capitalization of " + 
        str(round(row['Market Cap']/1e+9, 2)) + 
        " billion dollars."
    )

    clause2 = nlgFactory.createClause(
        "the company ", verb2, object2
    )

    object3 = nlgFactory.createNounPhrase(
        " The annual dividend yield is " + 
        str(round(row['Dividend Yield'], 2))
    )

    clause3 = nlgFactory.createClause(
        object3
    )

    verb4 = nlgFactory.createVerbPhrase(
        "have"
    )

    object4 = nlgFactory.createNounPhrase(
        "a strong financial position with an EBITDA of " +
        str(round(row['EBITDA']/1e+9, 2)) + 
        " billion dollars."
    )

    clause4 = nlgFactory.createClause(
        "It", verb4, object4
    )

    if row['EBITDA'] < 0:
        # This is to negate the clause
        # If the EBITDA is negative, we want to say that the company does not have a strong financial position
        clause4.setFeature(Feature.NEGATED, True)


    s1 = nlgFactory.createSentence(clause1)
    s2 = nlgFactory.createSentence(clause2)
    s3 = nlgFactory.createSentence(clause3)
    s4 = nlgFactory.createSentence(clause4)

    paragraph = nlgFactory.createParagraph([
        s1, s2, s3, s4
    ])

    output = realiser.realise(paragraph).getRealisation()

    return output.strip()


In [8]:
stocks_df['text'] = stocks_df.apply(lambda r: create_descriptions(r), axis=1)

stocks_df[['Name', 'Sector', 'Price', 'Dividend Yield', 'Market Cap', 'text']].sample(10)

Unnamed: 0,Name,Sector,Price,Dividend Yield,Market Cap,text
371,Praxair Inc.,Materials,144.07,2.199853,42948660000.0,The company Praxair Inc. is part of the Materi...
454,Tyson Foods,Consumer Staples,73.92,1.635323,26957530000.0,The company Tyson Foods is part of the Consume...
101,CF Industries Holdings Inc,Materials,37.46,3.039514,9209107000.0,The company CF Industries Holdings Inc is part...
2,Abbott Laboratories,Health Care,56.27,1.908982,102121000000.0,The company Abbott Laboratories is part of the...
115,Citizens Financial Group,Financials,42.19,1.962533,22008050000.0,The company Citizens Financial Group is part o...
404,SBA Communications,Real Estate,159.85,0.0,19572030000.0,The company SBA Communications is part of the ...
153,Dover Corp.,Industrials,96.2,1.881505,15566650000.0,The company Dover Corp. is part of the Industr...
315,Mondelez International,Consumer Staples,42.68,1.99773,65827820000.0,The company Mondelez International is part of ...
477,Visa Inc.,Information Technology,113.86,0.702048,270038700000.0,The company Visa Inc. is part of the Informati...
252,Intl Flavors & Fragrances,Materials,138.0,1.934128,11270040000.0,The company Intl Flavors & Fragrances is part ...


In [9]:
stocks_df.iloc[2,:] # Example of how to access the text for a specific row

Symbol                                                          ABT
Name                                            Abbott Laboratories
Sector                                                  Health Care
Price                                                         56.27
Price/Earnings                                                22.51
Dividend Yield                                             1.908982
Earnings/Share                                                 0.26
52 Week Low                                                    64.6
52 Week High                                                  42.28
Market Cap                                           102121042306.0
EBITDA                                                 5744000000.0
Price/Sales                                                 3.74048
Price/Book                                                     3.19
SEC Filings       http://www.sec.gov/cgi-bin/browse-edgar?action...
text              The company Abbott Laboratorie

In [10]:
stocks_df['text'][2]

'The company Abbott Laboratories is part of the Health Care Sector and is currently trading at $56.27 per share. The company Boastses a market capitalization of 102.12 billion dollars. The annual dividend yield is 1.91. It has a strong financial position with an EBITDA of 5.74 billion dollars.'

In [11]:
stocks_df['text'][10]

'The company AES Corp is part of the Utilities Sector and is currently trading at $10.06 per share. The company Boastses a market capitalization of 6.92 billion dollars. The annual dividend yield is 4.96. It has a strong financial position with an EBITDA of 3.0 billion dollars.'

In [12]:
stocks_df['text'][23]

'The company Allergan, Plc is part of the Health Care Sector and is currently trading at $164.2 per share. The company Boastses a market capitalization of 56.67 billion dollars. The annual dividend yield is 1.64. It does not have a strong financial position with an EBITDA of -2.89 billion dollars.'

In [13]:
stocks_df[stocks_df['EBITDA'] < 0]

Unnamed: 0,Symbol,Name,Sector,Price,Price/Earnings,Dividend Yield,Earnings/Share,52 Week Low,52 Week High,Market Cap,EBITDA,Price/Sales,Price/Book,SEC Filings,text
23,AGN,"Allergan, Plc",Health Care,164.2,10.65,1.643289,38.35,256.8,160.07,56668830000.0,-2888100000.0,4.820115,0.83,http://www.sec.gov/cgi-bin/browse-edgar?action...,"The company Allergan, Plc is part of the Healt..."
59,ADSK,Autodesk Inc,Information Technology,104.81,-77.07,0.0,-2.61,131.1,81.75,24348290000.0,-378100000.0,16.50682,224.13,http://www.sec.gov/cgi-bin/browse-edgar?action...,The company Autodesk Inc is part of the Inform...
143,XRAY,Dentsply Sirona,Health Care,56.85,22.65,0.600343,1.99,68.98,52.535,13390510000.0,-411100000.0,4.626262,1.8,http://www.sec.gov/cgi-bin/browse-edgar?action...,The company Dentsply Sirona is part of the Hea...
193,FE,FirstEnergy Corp,Utilities,30.64,11.18,4.673807,-14.49,35.22,27.93,13706080000.0,-5067000000.0,1.299448,2.19,http://www.sec.gov/cgi-bin/browse-edgar?action...,The company FirstEnergy Corp is part of the Ut...
209,GE,General Electric,Industrials,14.45,13.76,3.147541,-0.72,30.59,14.71,132249300000.0,-206000000.0,1.088761,1.7,http://www.sec.gov/cgi-bin/browse-edgar?action...,The company General Electric is part of the In...
229,HES,Hess Corporation,Energy,43.0,-9.33,2.26706,-19.94,55.48,37.25,14016130000.0,-819000000.0,3.780475,1.08,http://www.sec.gov/cgi-bin/browse-edgar?action...,The company Hess Corporation is part of the En...
245,INCY,Incyte,Health Care,83.92,-119.89,0.0,0.54,153.15,84.21,18220960000.0,-81686000.0,17.02699,10.25,http://www.sec.gov/cgi-bin/browse-edgar?action...,The company Incyte is part of the Health Care ...
299,MAT,Mattel Inc.,Consumer Discretionary,16.0,-14.68,0.0,-3.06,26.3,12.71,5843402000.0,-203599000.0,1.186372,3.87,http://www.sec.gov/cgi-bin/browse-edgar?action...,The company Mattel Inc. is part of the Consume...
336,NBL,Noble Energy Inc,Energy,25.43,105.96,1.477105,-2.32,39.6,22.985,13177330000.0,-518000000.0,4.697645,1.44,http://www.sec.gov/cgi-bin/browse-edgar?action...,The company Noble Energy Inc is part of the En...
