In [1]:
import nltk
import string
import spacy
import pandas as pd
from collections import Counter
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from spacy.lang.en.stop_words import STOP_WORDS
from sklearn.metrics import accuracy_score
from spacy.lang.en import English
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split 

In [2]:
def clean_text_and_get_tokens( text ):
    tokens = word_tokenize(text)
    # convert to lower case
    tokens = [w.lower() for w in tokens]
    # remove punctuation from each word

    table = str.maketrans('', '', string.punctuation)
    stripped = [w.translate(table) for w in tokens]
    # remove remaining tokens that are not alphabetic
    words = [word for word in stripped if word.isalpha()]
    # filter out stop words

    stop_words = set(stopwords.words('english'))
    words = [w for w in words if not w in stop_words]
    
    return words

In [3]:


# Create our list of punctuation marks
punctuations = string.punctuation

# Create our list of stopwords
nlp = spacy.load('en')
stop_words = spacy.lang.en.stop_words.STOP_WORDS

# Load English tokenizer, tagger, parser, NER and word vectors
parser = English()

# Creating our tokenizer function
def spacy_tokenizer(sentence):
    sentence = sentence.lower()
    # Creating our token object, which is used to create documents with linguistic annotations.
    mytokens = parser(sentence)

    # Lemmatizing each token and converting each token into lowercase
    mytokens = [ word.lemma_.lower().strip() if word.lemma_ != "-PRON-" else word.lower_ for word in mytokens ]

    # Removing stop words
    mytokens = [ word for word in mytokens if word not in stop_words and word not in punctuations ]

    # return preprocessed list of tokens
    return mytokens

In [4]:
train_df = pd.read_csv( 'train.csv')

In [5]:
train_df.head()

Unnamed: 0,id,keyword,location,text,target
0,1,,,Our Deeds are the Reason of this #earthquake M...,1
1,4,,,Forest fire near La Ronge Sask. Canada,1
2,5,,,All residents asked to 'shelter in place' are ...,1
3,6,,,"13,000 people receive #wildfires evacuation or...",1
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1


In [6]:
train_df[ 'word_tokens' ] = train_df[ 'text' ].apply(clean_text_and_get_tokens).apply(lambda x : spacy_tokenizer( ' '.join( x ) ) )

In [7]:
pd.set_option('display.max_columns', 500)

In [8]:
train_df.head()

Unnamed: 0,id,keyword,location,text,target,word_tokens
0,1,,,Our Deeds are the Reason of this #earthquake M...,1,"[deeds, reason, earthquake, allah, forgive]"
1,4,,,Forest fire near La Ronge Sask. Canada,1,"[forest, fire, near, la, ronge, sask, canada]"
2,5,,,All residents asked to 'shelter in place' are ...,1,"[residents, asked, shelter, place, notified, o..."
3,6,,,"13,000 people receive #wildfires evacuation or...",1,"[people, receive, wildfires, evacuation, order..."
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1,"[got, sent, photo, ruby, alaska, smoke, wildfi..."


In [9]:
def update_word_count( x ):
    
    x['word_counts' ]['id' ] = x['id' ]
    x['word_counts' ]['target' ] = x['target']
    return x['word_counts'] 

In [10]:
train_df[ 'word_counts' ] = train_df[ 'word_tokens' ].apply( lambda x : Counter( x ) )

In [11]:
train_df[ 'word_counts' ] = train_df.apply( update_word_count , axis = 1 )

In [12]:
new_train_df = pd.DataFrame( data = train_df[ 'word_counts' ].tolist() )

In [13]:
new_train_df.columns

Index(['aa', 'aaaa', 'aaaaaaallll', 'aaaaaand', 'aaarrrgghhh', 'aaceorg',
       'aan', 'aannnnd', 'aar', 'aaronthefm',
       ...
       'zxathetis', 'zzzz', 'å', 'åç', 'åè', 'åê', 'åêfedex', 'åêi', 'ìñ',
       'ìü'],
      dtype='object', length=16475)

In [14]:
input_cols = new_train_df.columns.difference( [ 'id' , 'target' ])

In [15]:
input_cols

Index(['aa', 'aaaa', 'aaaaaaallll', 'aaaaaand', 'aaarrrgghhh', 'aaceorg',
       'aan', 'aannnnd', 'aar', 'aaronthefm',
       ...
       'zxathetis', 'zzzz', 'å', 'åç', 'åè', 'åê', 'åêfedex', 'åêi', 'ìñ',
       'ìü'],
      dtype='object', length=16473)

In [16]:
op_cols = ['target' ]
input_df = new_train_df[ input_cols ] 
op_df    = new_train_df[ op_cols ]

In [17]:

trainX , testX , trainY , testY = train_test_split( input_df , op_df )

In [18]:
lr = LogisticRegression()

In [19]:
trainX.fillna(0 , inplace = True )

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)


In [20]:
def update_word_count_new( x ):
    
    x['word_counts' ]['id' ] = x['id' ]
#     x['word_counts' ]['target' ] = x['target']
    return x['word_counts'] 
def preprocess_df( df ):
    df[ 'word_tokens' ] = df[ 'text' ].apply(clean_text_and_get_tokens).apply(lambda x : spacy_tokenizer( ' '.join( x ) ) )
    df[ 'word_counts' ] = df[ 'word_tokens' ].apply( lambda x : Counter( x ) )
    df[ 'word_counts' ] = df.apply( update_word_count_new , axis = 1 )
    new_df = pd.DataFrame( data = df[ 'word_counts' ].tolist() )
    
    return new_df
    

In [21]:
from sklearn.model_selection import GridSearchCV

In [22]:
finder = GridSearchCV( lr , { 'C' : [ 0.2 , 0.4 , 0.6 , 0.8 ,1.0 , 1.5 , 2.0 , 2.5 , 3.0 ] } )

In [23]:
finder.fit(trainX , trainY)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


GridSearchCV(cv=None, error_score='raise',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'C': [0.2, 0.4, 0.6, 0.8, 1.0, 1.5, 2.0, 2.5, 3.0]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [24]:
best_clf = _

In [25]:
best_clf

GridSearchCV(cv=None, error_score='raise',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'C': [0.2, 0.4, 0.6, 0.8, 1.0, 1.5, 2.0, 2.5, 3.0]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [26]:
testX.fillna( 0 , inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)


In [27]:
best_clf.predict

<function sklearn.model_selection._search.BaseSearchCV.predict(self, X)>

In [28]:
predicted_vals = best_clf.predict( testX )

In [29]:
accuracy = accuracy_score( testY , predicted_vals )

In [30]:
accuracy

0.8103991596638656

In [31]:
imp_metrics = [ i for i in zip( input_cols , best_clf.best_estimator_.coef_[0] ) ]

In [32]:
imp_metrics.sort( key = lambda x : x[ 1 ] )

In [33]:
non_disaster_tweets_words = imp_metrics[:20]

In [34]:
non_disaster_tweets_words

[('nowplaying', -0.9999311555270974),
 ('ebay', -0.9868548143051163),
 ('traumatised', -0.882243217134713),
 ('blew', -0.8808766781398087),
 ('new', -0.8589577442836113),
 ('song', -0.8271510144083069),
 ('ruin', -0.8265158720677837),
 ('love', -0.806423989848722),
 ('bags', -0.7906701521198848),
 ('body', -0.7666938171324615),
 ('bloody', -0.7462796429875065),
 ('mayhem', -0.7326655146711534),
 ('check', -0.7245345840057005),
 ('gop', -0.7233304641319701),
 ('mode', -0.6974515279341782),
 ('ass', -0.6867261627652391),
 ('electrocute', -0.6768578565306242),
 ('inundated', -0.6697383256183752),
 ('let', -0.6450712776124109),
 ('technology', -0.6386041378897379)]

In [35]:
disaster_words = imp_metrics[-20:]

In [36]:
disaster_words

[('deaths', 1.0480765265010343),
 ('drought', 1.074279539336733),
 ('killed', 1.0755242217739238),
 ('massacre', 1.0816776144799682),
 ('derailment', 1.1219219524772845),
 ('fire', 1.1294783276745883),
 ('train', 1.1341239640336793),
 ('debris', 1.137774078947554),
 ('explosion', 1.1559748180025309),
 ('bombing', 1.1634432378341577),
 ('typhoon', 1.2221305210108453),
 ('near', 1.2432204046158613),
 ('wildfire', 1.2610738847963763),
 ('storm', 1.2678959120074116),
 ('suicide', 1.2845089278780972),
 ('california', 1.2922648892433364),
 ('fires', 1.3661775442914832),
 ('casualties', 1.3675547309872604),
 ('earthquake', 1.6029523512186594),
 ('hiroshima', 1.7863897170117582)]

In [37]:
!ls ~/Downloads/nlp-getting-started | grep test

test.csv


In [38]:
test_df = pd.read_csv( '~/Downloads/nlp-getting-started/test.csv')

In [39]:
test_df.shape

(3263, 4)

In [40]:
test_df = preprocess_df( test_df )


In [41]:
test_df.head()

Unnamed: 0,aapatwork,aaron,aarongoodwin,aaronkearneyaus,aatraffic,aba,abandoned,abandons,abba,abbog,abbott,abbswinston,abbydphillip,abc,abcfmelissajoey,abcnetwork,abcnews,abdulrasheed,abe,abella,abh,abide,ability,ablaze,ablazeco,able,abnormally,abolishing,abomb,abombs,abomination,aboooooortiooooonnnnns,aboriginal,aborted,abortions,abounds,aboveandbeyond,abrianna,abs,absol,absolute,absolutely,abstorm,abt,abuse,abused,abuseddesolate,abuses,academy,acapella,accept,acceptable,access,accident,accidentally,accionempresa,accomplishing,according,account,accounts,accuracyesports,accurate,accurately,accused,accuses,acdelco,ace,achieved,achievements,achimota,acid,acoustic,acquisition,acreativedc,acres,act,acted,acting,action,activate,activated,activates,activation,activists,activity,acts,actual,actually,adam,adamrubinespn,adamtuss,adani,adapt,adarius,add,added,addiction,adding,address,adilghumro,adjusted,adjustments,admin,administer,administration,administrator,admins,admiral,admired,admits,admitted,adolwyn,adult,adultress,adults,advance,advanced,advancedwarfare,advantage,adventure,advice,advisory,aesthetic,af,affect,affected,affecting,affection,affects,affiliate,affiliated,affluent,afford,affordable,afghan,afp,afraid,africa,african,afrojazz,afterlife,aftermath,afternoon,aftershock,againspiritual,age,agen,agencies,agency,agent,ages,aggravated,aggravating,aggressively,aging,ago,agree,agreed,agrees,agricultural,aguero,ah,ahead,ahhhh,ahrar,ai,aichi,aichmomanic,aid,aiden,aids,aim,aims,air,airbase,airborne,aircraft,airdrie,airline,airlines,airplane,airplanes,airplaneåê,airport,airportwatch,airsoft,aisle,ajenews,ajmal,ak,aka,akfnejf,akinwunmiambode,al,ala,alabama,aladdin,alan,alarm,alarming,alaska,alaskapublic,albania,albert,alberta,albertans,albertbreer,albertbrooks,albertville,album,alc,alcohol,alcoholismaddiction,aldwark,alejandro,aleppo,alert,alertness,aleshabell,alex,alexandbondarev,alexander,alexeeles,alexhern,alexthemurder,alexxpage,alfa,alfons,alfredo,algae,alibaba,alienateplays,alive,aliyeskii,aliyu,aljavieera,allah,allahgodjesusthe,allahuakbar,allegations,allegedly,allen,allenpeacock,allergan,alley,alliance,allied,allies,allow,allows,...,winnable,winnemem,winner,winningwhit,winston,winter,wintergreen,wip,wiped,wire,wired,wirelessly,wires,wisdom,wise,wish,wishes,wishing,wit,witch,wither,witnessed,witnesses,witnessing,witter,wizkhalifa,wlb,wlsdomteeths,wmata,wmur,wnba,wnukes,wo,woke,wolf,wolfgangjoc,wololo,wolvenbeauty,wolverine,wolves,woman,womanxking,womb,women,womens,womensweeklymag,wonder,wonderful,wondering,wood,woodchucks,woodelijah,wooden,woodland,woods,wook,word,wordoftheday,words,work,worked,worker,workers,working,workout,workrelated,works,worktime,world,worldnetdaily,worldnews,worldoilåê,worldrunners,worlds,worldvision,wormhole,wormwood,worried,worries,worry,worse,worsens,worst,worth,woulda,wound,wounded,wounding,wounds,wout,wow,wowo,wozni,wp,wquddin,wr,wral,wrapped,wraps,wreck,wreckage,wrecked,wreckit,wrestling,wristband,write,writer,writers,writerslife,writes,writing,written,wrld,wrong,wrongsuddenly,wrote,wroug,wrought,ws,wsaz,wsazbrittany,wsea,wsj,wtf,wth,wtm,wuckforld,wvent,wwe,wweapons,wwii,wwiiera,www,wx,wxky,wyattmccab,x,xbox,xcom,xfactor,xl,xlgamesru,xmankitty,xmen,xmissxanthippex,xmwte,xodeemorgss,xoxoxxxooo,xrwn,xshanemichaelsx,xuskak,xvii,xxxmrbootleg,y,ya,yaaasss,yahistorical,yahoo,yahoonews,yakub,yakuboobs,yal,yale,yankees,yanks,yaounde,yarbrough,yard,yay,yazidis,yc,ye,yea,yeah,year,years,yeeessss,yeetrpan,yeg,yeh,yellow,yellowstone,yelp,yemen,yemmely,yer,yes,yessss,yesterday,yezidi,yiayplan,ykjl,yknow,ynovak,yo,yoga,york,yorker,youcantsitwithus,yougov,yougslavia,youku,young,youngheroesid,youngthug,yourgirlhaileyy,yoursstand,youth,youtube,ypg,yr,yrs,yup,yyc,yychail,yycstorm,yyctraffic,yycweather,zack,zakbagans,zakiakiia,zarnikyaw,zayn,zaynmalik,zealand,zeenews,zero,zeroes,zerohedge,zion,zionist,zippers,zirngast,zix,zjwhitman,zombie,zombies,zone,zones,zouis,zouma,zuma,zx,å,åà,åáåáåá,åè,åê,åêwas,ìàekdar
0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [42]:
lr = best_clf.best_estimator_

In [43]:
lr.fit( input_df.fillna( 0 ) , op_df )

  y = column_or_1d(y, warn=True)


LogisticRegression(C=0.4, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [48]:
cols = set( input_cols ) - set( test_df.columns )

In [49]:
for col in cols :

    test_df[ col ] = 0


In [50]:
predictions = lr.predict( test_df[ input_cols ] .fillna( 0 ) )

In [52]:
orig_test_df = pd.read_csv( '~/Downloads/nlp-getting-started/test.csv')

In [53]:
1

1

In [54]:
orig_test_df = orig_test_df[ ['id' ] ]

In [51]:
len( predictions)


3263

In [55]:
orig_test_df.shape

(3263, 1)

In [56]:
orig_test_df[ 'target' ] = predictions

In [57]:
orig_test_df.head()

Unnamed: 0,id,target
0,0,1
1,2,1
2,3,1
3,9,0
4,11,1


In [60]:
orig_test_df.to_csv( 'submission_nlp.csv' , index = False )

In [58]:
test_df_ = pd.read_csv( '~/Downloads/nlp-getting-started/test.csv')

In [59]:
test_df_.head()

Unnamed: 0,id,keyword,location,text
0,0,,,Just happened a terrible car crash
1,2,,,"Heard about #earthquake is different cities, s..."
2,3,,,"there is a forest fire at spot pond, geese are..."
3,9,,,Apocalypse lighting. #Spokane #wildfires
4,11,,,Typhoon Soudelor kills 28 in China and Taiwan
