# Structured data from Forums

We are looking to investigate if Forum conversations can be used to extract structured data for XA

In [340]:
import pandas as pd
import numpy as np
import spacy
import en_core_web_sm
import textacy
import nltk

from spacy.lang.en.stop_words import STOP_WORDS
from nltk.stem.porter import *

nlp = en_core_web_sm.load()

In [341]:
rows = pd.read_csv("data/quora.csv", usecols=[0])

In [342]:
rows.head(4)

Unnamed: 0,Link
0,"After high winds, the wire from the street pol..."
1,Error code when trying to update equipment
2,More speed
3,i need faster internet but cant get it.


In [343]:
stemmer = PorterStemmer()

def recognizePOS(parsed, POS) :
    words = []
    for token in parsed :
        if token.pos_ == POS :
            word = token.orth_
            words.append(word)
    return words


def extractEntities(text):   
    text = text.lower()
    
    doc = nlp(text)
    
    token_list = []
    for token in doc:
        token_list.append(token.lemma_)
        
    filtered_sentence =[] 

    for word in token_list:
        lexeme = nlp.vocab[word]
        if lexeme.is_stop == False:
            filtered_sentence.append(word) 

    
    svos = textacy.extract.subject_verb_object_triples(doc)
    doc = nlp(" ".join(filtered_sentence))
    return pd.Series(data=[text, [chunk.text for chunk in doc.noun_chunks], 
                           recognizePOS(doc, "NOUN"),
                           recognizePOS(doc, "VERB"),
                           recognizePOS(doc, "ADJ"),
                           [svo for svo in svos]
                           ]
                    )




In [344]:
nRows = rows.apply(lambda row : extractEntities(row["Link"]),axis=1)

In [345]:
nRows = nRows.rename(columns={0: "text", 1: "noun chunks", 2: "nouns", 3: "verbs", 4: "adjs", 5: "Sub/Verb/Obj"})
nRows.head(60)

Unnamed: 0,text,noun chunks,nouns,verbs,adjs,Sub/Verb/Obj
0,"after high winds, the wire from the street pol...","[high wind, wire street pole]","[wind, wire, street, pole]",[],"[high, low]",[]
1,error code when trying to update equipment,"[error code, update equipment]","[error, code, update, equipment]",[try],[],[]
2,more speed,[speed],[speed],[],[],[]
3,i need faster internet but cant get it.,[fast internet],[internet],[need],[fast],"[((i), (need), (internet))]"
4,payment,[payment],[payment],[],[],[]
5,recently updated plan turned into nightmare,"[recently update plan, nightmare]","[update, plan, nightmare]",[turn],[],[]
6,xb7,[xb7],[],[],[],[]
7,property damage,[property damage],"[property, damage]",[],[],[]
8,incompetent customer service,[incompetent customer service],"[customer, service]",[],[incompetent],[]
9,manage my plan link has been broke for several...,[break],"[-PRON-, plan, break, year]","[manage, link]",[],[]


In [346]:
term = "account"

fRows = nRows.copy()
fRows["found"] = nRows["nouns"].apply(lambda l: l.count(term))

indexNames = fRows[ fRows['found'] == 0 ].index
fRows.drop(indexNames , inplace=True)
fRows


Unnamed: 0,text,noun chunks,nouns,verbs,adjs,Sub/Verb/Obj,found
47,suddenly inactive account,[suddenly inactive account],[account],[],[inactive],[],1
49,deprovisioning all devices under my account,[deprovision device -PRON- account],"[deprovision, device, account]",[],[],[],1
54,old xfinity account preventing use of apps at ...,"[old xfinity account prevent, app new address]","[xfinity, account, prevent, use, app, address]",[],"[old, new]",[],1
56,how to delete old account?,[delete old account],[account],[],"[delete, old]",[],1
125,need bills of my old account ending with 9719,[bill -PRON- old account],"[bill, account, end]",[need],[old],[],1
156,remove old address from account,[old address account],"[address, account]",[remove],[old],[],1
184,no order information or account set up,[order information account],"[order, information, account, set]",[],[],[],1
193,unlinking accounts,"[unlink, account]","[unlink, account]",[],[],[],1
212,"my account suddenly became ""inactive""....",[ -PRON- account],[account],[reconnect],[inactive],[],1
232,xfinity keeps sending account-related messages...,"[xfinity, account - relate message restrict us...","[xfinity, account, relate, message, user, -PRO...","[send, restrict]",[],"[((xfinity), (keeps), (sending))]",2
