# Structured data from Forums

We are looking to investigate if Forum conversations can be used to extract structured data for XA

In [327]:
import pandas as pd
import numpy as np
import spacy
import en_core_web_sm
import textacy
import nltk

from spacy.lang.en.stop_words import STOP_WORDS
from nltk.stem.porter import *

nlp = en_core_web_sm.load()

In [317]:
rows = pd.read_csv("data/forum_data.csv", usecols=[0])

In [318]:
rows.head(4)

Unnamed: 0,Link
0,"After high winds, the wire from the street pol..."
1,Error code when trying to update equipment
2,More speed
3,i need faster internet but cant get it.


In [328]:
stemmer = PorterStemmer()

def recognizePOS(parsed, POS) :
    words = []
    for token in parsed :
        if token.pos_ == POS :
            word = token.orth_
            words.append(word)
    return words


def extractEntities(text):   
    text = text.lower()
    
    doc = nlp(text)
    
    token_list = []
    for token in doc:
        token_list.append(token.text)
        
    filtered_sentence =[] 

    for word in token_list:
        lexeme = nlp.vocab[word]
        if lexeme.is_stop == False:
            filtered_sentence.append(word.lemma_) 

    
    svos = textacy.extract.subject_verb_object_triples(doc)
    doc = nlp(" ".join(filtered_sentence))
    return pd.Series(data=[text, [chunk.text for chunk in doc.noun_chunks], 
                           recognizePOS(doc, "NOUN"),
                           recognizePOS(doc, "VERB"),
                           recognizePOS(doc, "ADJ"),
                           [svo for svo in svos],
                           filtered_sentence
                           ]
                    )




In [329]:
nRows = rows.apply(lambda row : extractEntities(row["Link"]),axis=1)

In [330]:
nRows = nRows.rename(columns={0: "text", 1: "noun chunks", 2: "nouns", 3: "verbs", 4: "adjs"})
nRows.head(60)

Unnamed: 0,text,noun chunks,nouns,verbs,adjs,5,6
0,"after high winds, the wire from the street pol...","[high wind, wire street pole]","[wind, wire, street, pole]",[],"[high, low]",[],"[high, wind, ,, wire, street, pole, low]"
1,error code when trying to update equipment,"[error, tri updat equip]","[error, code, tri, updat, equip]",[],[],[],"[error, code, tri, updat, equip]"
2,more speed,[speed],[speed],[],[],[],[speed]
3,i need faster internet but cant get it.,[faster internet],[internet],[need],[faster],"[((i), (need), (internet))]","[need, faster, internet, nt, .]"
4,payment,[payment],[payment],[],[],[],[payment]
5,recently updated plan turned into nightmare,[recent updat plan],"[updat, plan, nightmar]",[turn],[recent],[],"[recent, updat, plan, turn, nightmar]"
6,xb7,[xb7],[],[],[],[],[xb7]
7,property damage,[properti damag],"[properti, damag]",[],[],[],"[properti, damag]"
8,incompetent customer service,[incompet custom],"[custom, servic]",[],[incompet],[],"[incompet, custom, servic]"
9,manage my plan link has been broke for several...,[manag plan link],"[manag, plan, link, year]",[broke],[],[],"[manag, plan, link, broke, year]"


In [322]:
term = "account"

fRows = nRows.copy()
fRows["found"] = nRows["nouns"].apply(lambda l: l.count(term))

indexNames = fRows[ fRows['found'] == 0 ].index
fRows.drop(indexNames , inplace=True)
fRows


Unnamed: 0,text,noun chunks,nouns,verbs,adjs,5,6,found
47,suddenly inactive account,[suddenly inactive account],[account],[],[inactive],[],"[suddenly, inactive, account]",1
49,deprovisioning all devices under my account,[devices],"[devices, account]",[deprovisioning],[],[],"[deprovisioning, devices, account]",1
54,old xfinity account preventing use of apps at ...,"[old xfinity account, use, new address]","[xfinity, account, use, apps, address]",[preventing],"[old, new]",[],"[old, xfinity, account, preventing, use, apps,...",1
56,how to delete old account?,[delete old account],[account],[],"[delete, old]",[],"[delete, old, account, ?]",1
125,need bills of my old account ending with 9719,"[need, bills old account]","[need, bills, account, ending]",[],[old],[],"[need, bills, old, account, ending, 9719]",1
156,remove old address from account,[old address account],"[address, account]",[remove],[old],[],"[remove, old, address, account]",1
184,no order information or account set up,[order information account],"[order, information, account, set]",[],[],[],"[order, information, account, set]",1
232,xfinity keeps sending account-related messages...,"[xfinity, account - related messages, users]","[xfinity, account, messages, users]","[keeps, sending, related, restricted, account]",[],"[((xfinity), (keeps), (sending))]","[xfinity, keeps, sending, account, -, related,...",1
249,cancel service due to death of account holder,[service death account holder],"[service, death, account, holder]",[cancel],[],[],"[cancel, service, death, account, holder]",1
