In [1]:
# importing libraries

import pandas as pd
from sklearn.externals import joblib
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import SGDClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn import svm
from scipy.sparse import hstack
import warnings

from preprocessing.xml_2_dataframe import Xml2DataFrame
from preprocessing.pos_tagger import POSTagger

In [2]:
# path of training dataset

path_train = r'Laptops_Train_v2.xml'
path_test = r'C:Laptops_Test_Gold.xml'
new_test_path = r'test.xml'


In [3]:
# xml parser
def get_xml_data(path):
    xml2df = Xml2DataFrame()
    xml_dataframe = xml2df.process_data(path)
    return xml_dataframe


In [4]:
# Making list to train
train_dataframe = get_xml_data(path_train)
# print(train_dataframe.head())
train_text_list = train_dataframe['text']
train_aspects_list = list(train_dataframe['aspect_info'])
print(train_text_list.head())

0    i charge it at night and skip taking the cord with me because of the good battery life.                                                                                            
1    i bought a hp pavilion dv4-1222nr laptop and have had so many problems with the computer.                                                                                          
2    the tech guy then said the service center does not do 1-to-1 exchange and i have to direct my concern to the "sales" team, which is the retail shop which i bought my netbook from.
3    i investigated netbooks and saw the toshiba nb305-n410bl.                                                                                                                          
4    the other day i had a presentation to do for a seminar at a large conference in town- lots of people, little time to prep and have to set up a computer to a projector, etc.       
Name: text, dtype: object


In [5]:
# getting pos_tag for each review

def pos_tag(review):
    tagged_text_list = []
    pos_tagger = POSTagger()
    for text in review:
        tagged_text_list.append(pos_tagger.pos_tagger(text=text))
    return tagged_text_list


# pos_tag for  filtering of noun,adjective,verb,adverb

def filter_tag(tagged_reviews):
    filtered_list = []
    pos_tagger = POSTagger()
    for tagged_review in tagged_reviews:
        filtered_list.append(pos_tagger.filter_pos_tag(tagged_review))
    return filtered_list

In [6]:
# pos_tagging

tagged_text_list_train = pos_tag(train_text_list)


In [7]:
# getting the final train list after filtering

final_train_text_list = filter_tag(tagged_text_list_train)
print(final_train_text_list[:5])

['charge night skip taking cord good battery life', 'bought hp pavilion dv4-1222nr laptop many problems computer', 'tech guy said service center 1-to-1 exchange direct concern sales team retail shop bought netbook', 'investigated netbooks saw toshiba nb305-n410bl', 'day presentation seminar large conference town- lots people little time prep set computer projector etc']


In [8]:
#Selecting only 20 most common aspect.

def get_most_common_aspect(aspect_list):
    import nltk
    aspect_terms = []
    aspect_list = list(aspect_list.aspect_info)
    for inner_list in aspect_list:
        if inner_list is not None:
            for _dict in inner_list:
                # for key in _dict:
                aspect_terms.append(_dict.get('term'))

    most_common_aspect = [k for k, v in nltk.FreqDist(aspect_terms).most_common(1000)]
    return most_common_aspect

In [9]:
# generating the data frame

def get_data_frame(text_list, train_aspects_list, most_common_aspect):
    data = {'Text': text_list}
    df = pd.DataFrame(data)
    for inner_list in train_aspects_list:
        if inner_list is not None:
            for _dict in inner_list:
                
                if _dict.get('term') in most_common_aspect:
                    df.loc[train_aspects_list.index(inner_list), _dict.get('term')] = _dict.get('polarity')
    return df

In [10]:
# generate data frame for aspect extraction
def get_aspect_data_frame(df, most_common_aspect):
    for common_aspect in most_common_aspect:
        df[common_aspect] = df[common_aspect].replace(['positive','negative','neutral','conflict'],[1,1,1,1])
    df = df.fillna(0)
    return df

In [11]:
# getting and printing the most common aspect.

most_common_aspect = get_most_common_aspect(train_dataframe)
print(most_common_aspect)

['screen', 'price', 'use', 'battery life', 'battery', 'keyboard', 'programs', 'software', 'features', 'warranty', 'hard drive', 'quality', 'size', 'performance', 'speed', 'Windows', 'memory', 'graphics', 'applications', 'motherboard', 'runs', 'Vista', 'gaming', 'system', 'program', 'works', 'charge', 'design', 'display', 'power supply', 'Windows 7', 'service', 'operating system', 'windows', 'warrenty', 'value', 'keys', 'mouse', 'OS', 'speakers', 'extended warranty', 'shipping', 'games', 'cost', 'work', 'processor', 'look', 'hardware', 'power', 'weight', 'carry', 'fan', 'Keyboard', 'touchpad', 'trackpad', 'feature', 'boot up', 'webcam', 'internet', 'sound', 'touch pad', 'iWork', 'screen size', 'Windows 7 Starter', 'tech support', 'Screen', 'drivers', 'navigate', 'hinge', 'mousepad', 'iTunes', 'cd drive', 'space', 'iPhoto', 'RAM', 'edges', 'buttons', 'portability', 'service center', 'mother board', 'web browsing', 'iMovie', 'monitor', 'iLife', 'set up', 'HD', 'DVD burner', 'USB ports', '

In [12]:
#get data frame
df_train = get_data_frame(final_train_text_list,train_aspects_list, most_common_aspect)
df_train.head()

Unnamed: 0,Text,cord,battery life,service center,"""sales"" team",tech guy,quality,GUI,applications,use,...,Price,Value,WiFi,update programs,MS applications,Internet tabs,noises,bottom of the computer,repair technician,repair
0,charge night skip taking cord good battery life,neutral,positive,,,,,,,,...,,,,,,,,,,
1,bought hp pavilion dv4-1222nr laptop many problems computer,,,,,,,,,,...,,,,,,,,,,
2,tech guy said service center 1-to-1 exchange direct concern sales team retail shop bought netbook,,,negative,negative,neutral,,,,,...,,,,,,,,,,
3,investigated netbooks saw toshiba nb305-n410bl,,,,,,,,,,...,,,,,,,,,,
4,day presentation seminar large conference town- lots people little time prep set computer projector etc,,,,,,,,,,...,,,,,,,,,,


In [13]:
# get aspect term dataframe   with most common aspect.
df_train_aspect = get_aspect_data_frame(df_train, most_common_aspect)
df_train_aspect.head()

Unnamed: 0,Text,cord,battery life,service center,"""sales"" team",tech guy,quality,GUI,applications,use,...,Price,Value,WiFi,update programs,MS applications,Internet tabs,noises,bottom of the computer,repair technician,repair
0,charge night skip taking cord good battery life,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,bought hp pavilion dv4-1222nr laptop many problems computer,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,tech guy said service center 1-to-1 exchange direct concern sales team retail shop bought netbook,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,investigated netbooks saw toshiba nb305-n410bl,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,day presentation seminar large conference town- lots people little time prep set computer projector etc,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
df_train_aspect = df_train_aspect.reindex(sorted(df_train_aspect.columns), axis=1)
df_train_aspect.head()

Unnamed: 0,"""sales"" team",1 GB ram,1-year-warranty,10-key,12 cell battery,13 inch,15 inch,"15""",16GB RAM support,17 ince screen,...,word,word editing,word processer,word processing,word processing program,word processor,work,working,works,wt
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
# Similar for test list
test_dataframe = get_xml_data(path_test)     # getting the test data from test.xml file
test_text_list = train_dataframe['text']     # putting test data in data frame.
test_aspects_list = list(train_dataframe['aspect_info'])      # putting it into a list
tagged_text_list_test = pos_tag(test_text_list)           # pos_tagging of the test dataset 
final_test_text_list = filter_tag(tagged_text_list_test)    # final test dataset 
df_test = get_data_frame(final_test_text_list,test_aspects_list, most_common_aspect)   # get aspect term dataframe for test data
df_test_aspect = get_aspect_data_frame(df_test, most_common_aspect)
df_test_aspect = df_test_aspect.reindex(sorted(df_test_aspect.columns), axis=1)

In [16]:
# Sort the data frame according to aspect's name and separate data(X) and target(y)

# for training dataset
X_train= df_train_aspect.Text
y_train = df_train_aspect.drop('Text', 1)
print(y_train[:5])


# for testing dataset.
X_test = df_test_aspect.Text
y_test = df_test_aspect.drop('Text', 1)
final_most_common_aspect = list(y_train)
#list(y_train)

   "sales" team  1 GB ram  1-year-warranty  10-key  12 cell battery  13 inch  \
0  0.0           0.0       0.0              0.0     0.0              0.0       
1  0.0           0.0       0.0              0.0     0.0              0.0       
2  1.0           0.0       0.0              0.0     0.0              0.0       
3  0.0           0.0       0.0              0.0     0.0              0.0       
4  0.0           0.0       0.0              0.0     0.0              0.0       

   15 inch  15"  16GB RAM support  17 ince screen ...   word  word editing  \
0  0.0      0.0  0.0               0.0            ...   0.0   0.0            
1  0.0      0.0  0.0               0.0            ...   0.0   0.0            
2  0.0      0.0  0.0               0.0            ...   0.0   0.0            
3  0.0      0.0  0.0               0.0            ...   0.0   0.0            
4  0.0      0.0  0.0               0.0            ...   0.0   0.0            

   word processer  word processing  word processin

In [17]:
# Change y_train to numpy array
import numpy as np
y_train = np.asarray(y_train, dtype=np.int64)
y_test = np.asarray(y_test, dtype=np.int64)
print(y_train[:5])

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [18]:
# Generate word vecotors using CountVectorizer
from sklearn.feature_extraction.text import CountVectorizer
vect = CountVectorizer(max_df=1.0, stop_words='english')  
X_train_dtm = vect.fit_transform(X_train)
X_test_dtm = vect.transform(X_test)

In [19]:
# defining the training model for aspcet term extraction
# multinomial naive bayes class classifier.
# SVM
# SGD classifier
nb_classif = OneVsRestClassifier(MultinomialNB()).fit(X_train_dtm, y_train)
C = 1.0 
# SVregularization parameter
svc = OneVsRestClassifier(svm.SVC(kernel='linear', C=C)).fit(X_train_dtm, y_train)    # fiting the training data into the model 
lin_svc = OneVsRestClassifier(svm.LinearSVC(C=C)).fit(X_train_dtm, y_train)
sgd = OneVsRestClassifier(SGDClassifier(max_iter=1000)).fit(X_train_dtm,y_train)

In [20]:
# Predict the test data using classifiers
y_pred_class = nb_classif.predict(X_test_dtm)
y_pred_class_svc = svc.predict(X_test_dtm)
y_pred_class_lin_svc = lin_svc.predict(X_test_dtm)
y_pred_class_sgd = sgd.predict(X_test_dtm)

In [21]:
# Following code to test metrics of all aspect extraction classifiers

# Results

from sklearn import metrics
# printing accuracy of the model 
print("Accuracy of getting the aspect term using different model:-  ")
print(metrics.accuracy_score(y_test,y_pred_class))
print(metrics.accuracy_score(y_test,y_pred_class_svc))
print(metrics.accuracy_score(y_test,y_pred_class_lin_svc))
print(metrics.accuracy_score(y_test,y_pred_class_sgd))


# printing the  recall of the model
print("Recall of getting the aspect term using different model :- ")
print(metrics.recall_score(y_test,y_pred_class,average='micro'))
print(metrics.recall_score(y_test,y_pred_class_svc,average='micro'))
print(metrics.recall_score(y_test,y_pred_class_lin_svc,average='micro'))
print(metrics.recall_score(y_test,y_pred_class_sgd,average='micro'))



# printing the f1 score of the model
print(" F1 score of getting the aspect term using different model:-  ")
print(metrics.f1_score(y_test,y_pred_class,average='micro'))
print(metrics.f1_score(y_test,y_pred_class_svc,average='micro'))
print(metrics.f1_score(y_test,y_pred_class_lin_svc,average='micro'))
print(metrics.f1_score(y_test,y_pred_class_sgd,average='micro'))

Accuracy of getting the aspect term using different model:-  
0.5822660098522168
0.9894909688013136
0.994088669950739
0.9947454844006568
Recall of getting the aspect term using different model :- 
0.17890941865705273
0.9869310500225327
0.9932401982875169
0.9945921586300135
 F1 score of getting the aspect term using different model:-  
0.29973574933937336
0.9918478260869565
0.9950338600451467
0.9957139634558989


In [22]:
# printing the classification  report  of different model

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    print(metrics.classification_report(y_test, y_pred_class))
    print(metrics.classification_report(y_test, y_pred_class_svc))
    print(metrics.classification_report(y_test, y_pred_class_lin_svc))
    print(metrics.classification_report(y_test, y_pred_class_sgd))

             precision    recall  f1-score   support

          0       0.00      0.00      0.00         1
          1       1.00      1.00      1.00         1
          2       0.00      0.00      0.00         1
          3       0.00      0.00      0.00         1
          4       0.00      0.00      0.00         1
          5       0.00      0.00      0.00         1
          6       0.00      0.00      0.00         1
          7       0.00      0.00      0.00         1
          8       0.00      0.00      0.00         1
          9       0.00      0.00      0.00         1
         10       0.00      0.00      0.00         1
         11       0.00      0.00      0.00         1
         12       0.00      0.00      0.00         1
         13       0.00      0.00      0.00         1
         14       0.00      0.00      0.00         1
         15       0.00      0.00      0.00         1
         16       0.00      0.00      0.00         1
         17       0.00      0.00      0.00   

             precision    recall  f1-score   support

          0       1.00      1.00      1.00         1
          1       1.00      1.00      1.00         1
          2       1.00      1.00      1.00         1
          3       1.00      1.00      1.00         1
          4       1.00      1.00      1.00         1
          5       1.00      1.00      1.00         1
          6       0.00      0.00      0.00         1
          7       1.00      1.00      1.00         1
          8       1.00      1.00      1.00         1
          9       1.00      1.00      1.00         1
         10       1.00      1.00      1.00         1
         11       1.00      1.00      1.00         1
         12       1.00      1.00      1.00         1
         13       1.00      1.00      1.00         1
         14       1.00      1.00      1.00         1
         15       1.00      1.00      1.00         1
         16       1.00      1.00      1.00         1
         17       1.00      1.00      1.00   

             precision    recall  f1-score   support

          0       1.00      1.00      1.00         1
          1       1.00      1.00      1.00         1
          2       1.00      1.00      1.00         1
          3       1.00      1.00      1.00         1
          4       1.00      1.00      1.00         1
          5       1.00      1.00      1.00         1
          6       0.00      0.00      0.00         1
          7       1.00      1.00      1.00         1
          8       1.00      1.00      1.00         1
          9       1.00      1.00      1.00         1
         10       1.00      1.00      1.00         1
         11       1.00      1.00      1.00         1
         12       1.00      1.00      1.00         1
         13       1.00      1.00      1.00         1
         14       1.00      1.00      1.00         1
         15       1.00      1.00      1.00         1
         16       1.00      1.00      1.00         1
         17       1.00      1.00      1.00   

             precision    recall  f1-score   support

          0       1.00      1.00      1.00         1
          1       1.00      1.00      1.00         1
          2       1.00      1.00      1.00         1
          3       1.00      1.00      1.00         1
          4       1.00      1.00      1.00         1
          5       1.00      1.00      1.00         1
          6       1.00      1.00      1.00         1
          7       1.00      1.00      1.00         1
          8       1.00      1.00      1.00         1
          9       1.00      1.00      1.00         1
         10       1.00      1.00      1.00         1
         11       1.00      1.00      1.00         1
         12       1.00      1.00      1.00         1
         13       1.00      1.00      1.00         1
         14       1.00      1.00      1.00         1
         15       1.00      1.00      1.00         1
         16       1.00      1.00      1.00         1
         17       1.00      1.00      1.00   

In [23]:
def get_dict_aspect(y,most_common_aspect):
    position=[]
    for innerlist in y:
        position.append([i for i, j in enumerate(innerlist) if j == 1])
    sorted_common=sorted(most_common_aspect)
    dict_aspect=[]
    for innerlist in position:
        inner_dict={}
        for word in sorted_common:
            if sorted_common.index(word) in innerlist:
                inner_dict[word]= 5
            else:
                inner_dict[word]=0
        dict_aspect.append(inner_dict)
    return dict_aspect

In [24]:
# Generating extra feature that indicates which aspect category is present in the review
train_dict_aspect=get_dict_aspect(y_train, most_common_aspect)
d_train=DictVectorizer() 
X_train_aspect_dtm = d_train.fit_transform(train_dict_aspect)

# y_test is used to generated extra feature in order to test the performance of 2nd classifer.
#Use y_pred_class_svc(Highest performer for aspect classification) as input for extra feature to test the overall performace.
test_dict_aspect=get_dict_aspect(y_test, most_common_aspect)
d_test=DictVectorizer() 

In [25]:
from BIO_format import BIO

def BIO_format(text, predicted_output, common_words):
    bio_obj = BIO()
    df = bio_obj.convert_into_bio(text, predicted_output, common_words_list=common_words)
    return df

In [30]:
# Aspect term extractor of user's input.

#user_input = "it is of high quality, has a killer GUI, is extremely stable, is highly expandable, is bundled with lots of very good applications, is easy to use, and is absolutely gorgeous."

user_input=input("Enter the comment:- ")
# Preprocessing and vectorizing
tagged_user_input = pos_tag([user_input])
print(tagged_user_input)
filter_tagged_user_input = filter_tag(tagged_user_input)
print(filter_tagged_user_input)

user_input_series = pd.Series(filter_tagged_user_input)
print(user_input_series)
user_input_series_dtm = vect.transform(user_input_series)
print(user_input_series_dtm)
# print(user_input_series[:5])

predict_aspect= sgd.predict(user_input_series_dtm)
print(predict_aspect)
# predict_aspect_data = predict_aspect[0]
extra_feature=get_dict_aspect(predict_aspect, most_common_aspect)
extra_feature_dtm=DictVectorizer().fit_transform(extra_feature)
predict_aspect

Enter the comment:- The battery life is good
[[('The', 'DT'), ('battery', 'NN'), ('life', 'NN'), ('good', 'JJ')]]
['battery life good']
0    battery life good
dtype: object
  (0, 259)	1
  (0, 1339)	1
  (0, 1810)	1
[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [31]:
df = BIO_format(user_input, predict_aspect[0], final_most_common_aspect)
df

Unnamed: 0,text,BIO
0,The,O
1,battery,B
2,life,I
3,is,O
4,good,O
