In [44]:
import numpy as np
import os
import pandas as pd
import sys

In [45]:
train = pd.read_csv("Feature-OpinionExtraction.csv", delimiter='\t')
sample_output = pd.read_csv("Feature-OpinionExtraction_outputSample.csv", delimiter='\t')

In [46]:
train.head()

Unnamed: 0,Text
0,"After checking the result of my xray, told me ..."
1,after confirming with the nurse what pharmacy ...
2,After discussing the issues with the nurse (ma...
3,"After follow up with primary pedi, primary ped..."
4,"After getting all my information, the doctor w..."


In [47]:
sample_output.tail()

Unnamed: 0,Text,topics/keywords,review/opinion/feedback
3,"Staff was awesome, Doctor incredible.",Staff,awesome
4,"Staff was awesome, Doctor incredible.",Doctor,incredible
5,very clear instructions,instructions,very clear
6,Beautiful office surroundings & wonderful rece...,office surroundings,Beautiful
7,Beautiful office surroundings & wonderful rece...,receptionists,wonderful


In [48]:
sample_output['Text'][0]

'The office is very clean and customer service was great and speedy'

In [61]:
import spacy 
nlp = spacy.load('en_core_web_sm')

In [62]:
document = nlp(sample_output['Text'][0])

In [63]:
# create dependency tree
for word in document:
    print (word, ': ', str(list(word.children)))

The :  []
office :  [The]
is :  [office, clean, and, was]
very :  []
clean :  [very]
and :  []
customer :  []
service :  [customer]
was :  [service, great]
great :  [and, speedy]
and :  []
speedy :  []


In [91]:
from nltk import Tree
def to_nltk_tree(node):
    if node.n_lefts + node.n_rights > 0:
        return Tree(node.orth_, [to_nltk_tree(child) for child in node.children])
    else:
        return node.orth_


[to_nltk_tree(sent.root).pretty_print() for sent in document.sents]

             is                           
  ___________|_____________                
 |    |      |            was             
 |    |      |       ______|____           
 |  office clean service      great       
 |    |      |      |       ____|_____     
and  The    very customer and       speedy



[None]

In [123]:
tok = nlp(sample_output['Text'][0])
svos = findSVOs(tok)

for chunk in tok.noun_chunks:
    print(chunk.text, chunk.root.text, chunk.root.dep_,
          chunk.root.head.text)
    
for token in tok:
    print(token.text, token.dep_, token.head.text, token.head.pos_,
          [child for child in token.children])

The office office nsubj is
customer service service nsubj was
The det office NOUN []
office nsubj is VERB [The]
is ROOT is VERB [office, clean, and, was]
very advmod clean ADJ []
clean acomp is VERB [very]
and cc is VERB []
customer compound service NOUN []
service nsubj was VERB [customer]
was conj is VERB [service, great]
great acomp was VERB [and, speedy]
and cc great ADJ []
speedy conj great ADJ []


In [189]:
from nltk.corpus import stopwords
from string import punctuation
from spacy.symbols import nsubj, VERB
stop_words = set(stopwords.words('english'))

data_text = []
data_topics = []
data_opinions = []

for doc in train['Text']:
    tok = nlp(doc)
    # Finding subjects
    subjs = []
    opinions = []
    valid = ['NOUN','ADJ','PROP','NUM','PROPN']
    sub_op_dict = {}
    for possible_subject in tok:
        if possible_subject.dep == nsubj and possible_subject.head.pos == VERB:
            child_list = [child for child in possible_subject.head.children]
            subjs.append(possible_subject)

            for i in child_list:
                if (i not in subjs) and (i.pos_ in valid) and (i not in stop_words):
                    opinions.append(i)

            if possible_subject not in sub_op_dict:
                sub_op_dict[possible_subject] = opinions   
            else:
                sub_op_dict[possible_subject] = (opinions)

    for k,v in sub_op_dict.items():
        data_text.append(doc)
        data_topics.append(k)
        data_opinions.append(opinions)
        

In [196]:
###submission

result = pd.DataFrame()
result['Text'] = data_text
result['topics/keywords'] = data_topics
result['review/opinion/feedback'] = data_opinions

result.to_csv("nn_result.csv", index=False)

In [197]:
result.head(20)

Unnamed: 0,Text,topics/keywords,review/opinion/feedback
0,"After checking the result of my xray, told me ...",I,"[ok, medicines, papers, what]"
1,"After checking the result of my xray, told me ...",doctor,"[ok, medicines, papers, what]"
2,"After checking the result of my xray, told me ...",I,"[ok, medicines, papers, what]"
3,"After checking the result of my xray, told me ...",doctor,"[ok, medicines, papers, what]"
4,"After checking the result of my xray, told me ...",I,"[ok, medicines, papers, what]"
5,after confirming with the nurse what pharmacy ...,pharmacy,"[what, meds]"
6,After discussing the issues with the nurse (ma...,nurse,[report]
7,After discussing the issues with the nurse (ma...,nurse,[report]
8,"After follow up with primary pedi, primary ped...",pedi,[]
9,"After getting all my information, the doctor w...",doctor,[something]
