In [1]:
import sqlite3
from simplenlg import *

In [2]:
lexicon = Lexicon.getDefaultLexicon()
nlgFactory = NLGFactory(lexicon)
realiser = Realiser(lexicon)

In [3]:
database = 'transaction_data.sqlite'
try:
    connq = sqlite3.connect(database)
except Error as e:
    print(e)
cur = connq.cursor()

In [4]:
#no of rows in database.
cur.execute("select Count(*) from data" )
rows = cur.fetchall()
obj = nlgFactory.createNounPhrase(str(rows[0][0])+" transactions")
p=nlgFactory.createClause("the data","consist of",obj)
sentence1 = nlgFactory.createSentence()
sentence1.addComponent(p)
print(realiser.realise(sentence1))

The data consists of 50 transactions.


In [5]:
#type of transactions with count
cur.execute("select transaction_type, count(*) from data group by transaction_type" )
rows = cur.fetchall()
for row in rows:
    print(row)
n1 = nlgFactory.createNounPhrase(str(rows[0][0]) + " transactions")
n2 = nlgFactory.createNounPhrase(str(rows[1][0]) + " transactions")
n1.setPlural(True)
n2.setPlural(True)
q = nlgFactory.createClause()
diff = 0
if rows[0][1]>rows[1][1]:
    q.setSubject(n1)
    q.setObject(n2)
    diff = rows[0][1] - rows[1][1]
else:
    q.setSubject(n2)
    q.setObject(n1)
    diff = rows[1][1] - rows[0][1]
q.setVerb("exceed")
q.addComplement("by " + str(diff))
sentence2 = nlgFactory.createSentence()
sentence2.addComponent(q)
print(realiser.realise(sentence2))


('Credit', 19)
('Debit', 31)
Debit transactions exceed Credit transactions by 12.


In [6]:
#further details
cur.execute("select is_suspicious,transaction_type,count(*),avg(Occurance_probability) from data group by is_suspicious,transaction_type" )
rows = cur.fetchall()
sub = [None]*4
i = 0
for row in rows:
    print(row)
    if row is 0:
        sub[i]= nlgFactory.createNounPhrase("suspicious " + row[1].lower() + " transactions")
    else:
        sub[i] =  nlgFactory.createNounPhrase("non-suspicious " + row[1].lower() + " transactions")
        sub[i].setPlural(True)
        i = i+1
sub[0].addPreModifier("Total")
t = nlgFactory.createClause(sub[0],"is",str(rows[0][2]))
u = nlgFactory.createClause(sub[1],"is",str(rows[1][2]))
v = nlgFactory.createClause(sub[2],"is",str(rows[2][2]))
w = nlgFactory.createClause(sub[3],"is",str(rows[3][2]))
c = nlgFactory.createCoordinatedPhrase()
c.addCoordinate(t)
c.addCoordinate(u)
c.addCoordinate(v)
c.addCoordinate(w)
sentence4 = nlgFactory.createSentence()
sentence4.addComponent(c)
print(realiser.realise(sentence4))


(0, 'Credit', 16, 0.6055328582069228)
(0, 'Debit', 28, 0.6404107306994379)
(1, 'Credit', 3, 0.006173236695082996)
(1, 'Debit', 3, 0.01575442290295273)
Total non-suspicious credit transactions are 16, non-suspicious debit transactions are 28, non-suspicious credit transactions are 3 and non-suspicious debit transactions are 3.


In [7]:
#no of suspicious crimes with probability
cur.execute("select is_suspicious,count(*),avg(Occurance_probability) from data group by is_suspicious" )
rows = cur.fetchall()
for row in rows:
    print(row)

r = nlgFactory.createClause()
sub = nlgFactory.createNounPhrase("the probability")
pp = nlgFactory.createPrepositionPhrase();
pp.addComplement("a suspicious transaction");
pp.setPreposition("of");
sub.addPostModifier(pp);
r.setSubject(sub)
r.setVerb("be")
r.addComplement(str(round(rows[0][2],3)))
s = nlgFactory.createClause("non-suspicious transaction","is",str(round(rows[1][2],3)))
c = nlgFactory.createCoordinatedPhrase()
c.addCoordinate(r)
c.addCoordinate(s)
sentence3 = nlgFactory.createSentence()
sentence3.addComponent(c)
print(realiser.realise(sentence3))

(0, 44, 0.6277278679748869)
(1, 6, 0.010963829799017862)
The probability of a suspicious transaction is 0.628 and non-suspicious transaction is 0.011.


In [8]:
#Writing sentences for 5 rows.(rowise)
cur.execute("select * from data limit 5 offset 14")
rows = cur.fetchall()
for row in rows:
    print (row)
    
    r = nlgFactory.createClause()
    sub = nlgFactory.createNounPhrase("The Given account ",str(row[1]))
    r.setSubject(sub)
    r.setVerb("be")
    if str(row[2])=='Credit':
        r.addComplement("a credit account")
    else :
        r.addComplement("a debit account")
    if row[4]==0:
        s = nlgFactory.createClause("it is a non-suspicious transaction")
        c = nlgFactory.createCoordinatedPhrase(r,s)
        #c.addCoordinate(r)
        #c.addCoordinate(s)
        c.setFeature(Feature.CONJUNCTION, "and");
        sentence = nlgFactory.createSentence()
        sentence.addComponent(c)
        print(realiser.realise(sentence))
        print('\n')
    else:
        s = nlgFactory.createClause("it is a suspicious transaction")
        c = nlgFactory.createCoordinatedPhrase(r,s)
        c.setFeature(Feature.CONJUNCTION, "and");
        sentence = nlgFactory.createSentence()
        sentence.addComponent(c)
        print(realiser.realise(sentence))
        print('\n')
    

(15, 5014, 'Credit', 1971, 0, 0.509495204396564)
The Given account 5014 is a credit account and it is a non-suspicious transaction.


(16, 5008, 'Credit', 8849, 1, 0.000619510561900127)
The Given account 5008 is a credit account and it is a suspicious transaction.


(17, 5012, 'Debit', 7973, 1, 0.00243611794334719)
The Given account 5012 is a debit account and it is a suspicious transaction.


(18, 5005, 'Debit', 1482, 0, 0.600684031145574)
The Given account 5005 is a debit account and it is a non-suspicious transaction.


(19, 5015, 'Debit', 521, 0, 0.761056790634202)
The Given account 5015 is a debit account and it is a non-suspicious transaction.




In [9]:
#printing the paragraph
print(realiser.realise(sentence1),realiser.realise(sentence2),realiser.realise(sentence4),realiser.realise(sentence3))

The data consists of 50 transactions. Debit transactions exceed Credit transactions by 12. Total, non-suspicious credit transactions, are, 16, non-suspicious debit transactions are 28, non-suspicious credit transactions are 3, and non-suspicious debit transactions are 3. The probability of a suspicious transaction is 0.628, and non-suspicious transaction is 0.011.
