In [1]:
import requests
import numpy as np 
import pandas as pd
import pickle

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

from gensim.utils import tokenize
from gensim.parsing.preprocessing import remove_stopwords
from gensim.corpora import Dictionary
from gensim.models.ldamodel import LdaModel
from nltk.stem.snowball import SnowballStemmer

In [2]:
#models and associated saved files
rf = pickle.load(open('./models/random_forest_2.pkl','rb'))
tf = pickle.load(open('./models/tfidf_vectorizer_2.pkl','rb'))
topic_model=LdaModel.load('./models/topic_model/trained_model.tmp')
dictionary=Dictionary.load('./models/topic_model/dictionary.tmp')

#load stemmer for later use
snow=SnowballStemmer("english")

In [3]:

#=========================FUNCTIONS================================================

#given an inputed keyword, calls the API and returns a dataframe of first 50 results
#"title", "url", "body", and "datePublished" are probably the  only features
#we care about, but kept some of the others just in case. 
def search_keyword(text):
    url = "https://rapidapi.p.rapidapi.com/api/search/NewsSearchAPI"
    querystring = {"pageSize":"50",
                    "q": text,
                    "autoCorrect":"true",
                    "pageNumber":"1",
                    "toPublishedDate":"null",
                    "fromPublishedDate":"null"}
    headers = {
    'x-rapidapi-host': "contextualwebsearch-websearch-v1.p.rapidapi.com",
    'x-rapidapi-key': "55c35a554dmshab5af4556f598ffp1d3d45jsn8b763429c95f"
    }
    response = requests.request("GET", url, headers=headers, params=querystring)
    page_1_data=response.json()
    req_df=pd.DataFrame(page_1_data['value'])
    mask=req_df['provider'] == {'name':'wikipedia'}
    #get out of here, wikipedia
    req_df.drop(req_df[mask].index, inplace=True)
    req_df.drop(columns=["id", "isSafe", "image", "keywords"], inplace=True)
    req_df['datePublished']= pd.to_datetime(req_df['datePublished'])
    return req_df

#given a series of text strings, returns a binary vector with 1 if the story is classified as a disaster,
#0 otherwise
def predict_disaster(text_series):
	X = tf.transform(text_series)
	X = pd.DataFrame(X.toarray(), columns = tf.get_feature_names())
	is_disaster = rf.predict(X)
	return is_disaster

#given a probability prediction from the LDA model of form
#[(topic1, prob1), (topic2, prob2),...]  
#returns the topic number with the the highest assigned probabiltiy.
def probs_to_topic(probs):
    assigned_topic=-1
    max_prob=0
    for topic, prob in probs:
        if prob > max_prob:
            assigned_topic=topic
            max_prob=prob
    return assigned_topic

#input : for lack of a better assumption, let's assume that the input will be a dataframe that has one article per row,
#        and a feature named "body" of it's unprocessed body text as a string.
#        this could include title text as well, but didn't want to put too many assumptions on the input

#output: the same dataframe with three columns appended: token list, corpus (where the corpus is the token ids), 
##and predicted category

def body_topic(dataframe):
    text_body=dataframe['body'].values
    text_body=[remove_stopwords(body) for body in text_body]
    text_body=[tokenize(body, deacc="True", lowercase="True") for body in text_body]
    text_body=[[snow.stem(token) for token in word_list] for word_list in text_body]
    dataframe['tokens']=[list(gen) for gen in text_body]
    dataframe['corpus']=[dictionary.doc2bow(doc) for doc in dataframe['tokens']]
    dataframe['predicted_topic']= [probs_to_topic(topic_probs) for topic_probs in topic_model.get_document_topics(dataframe['corpus'])]
    return dataframe

   
# Topic Index Reference (These are not exact rules. Topics classified by the unsupervised trained LDA model)

# 0: Global Warming/Drought/Climate disasters.

# 1: Fires

# 2: Earthquakes/Volcanos/Seismic Events

# 3: Urban/Other (This is a weird one -- I think here were lots of airline accidents in the training data, and any article that talks about the urban ramifications of a disaster tends to get sorted here.).

# 4: Storms/Hurricanes

# 5: Floods/Rains

In [4]:
html_table='''<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>title</th>
      <th>url</th>
      <th>datePublished</th>
      <th>is_disaster</th>
      <th>predicted_topic</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>Her tribe tamed wildfires for generations. Their method might surprise you.</td>
      <td><a href="http://rss.cnn.com/~r/rss/cnn_freevideo/~3/o9QJaMUs7mY/california-wildfires-native-americans-prescribed-burn-eg-orig.cnn" target="_blank">http://rss.cnn.com/~r/rss/cnn_freevideo/~3/o9QJaMUs7mY/california-wildfires-native-americans-prescribed-burn-eg-orig.cnn</a></td>
      <td>2020-10-30 11:38:51.000</td>
      <td>1</td>
      <td>0</td>
    </tr>
    <tr>
      <th>1</th>
      <td>Study of California wildfire costs suggests need for better tracking by state  Chico Enterprise-Record</td>
      <td><a href="https://www.chicoer.com/2020/10/30/study-of-california-wildfire-costs-suggests-need-for-better-tracking-by-state/" target="_blank">https://www.chicoer.com/2020/10/30/study-of-california-wildfire-costs-suggests-need-for-better-tracking-by-state/</a></td>
      <td>2020-10-30 11:25:29.000</td>
      <td>1</td>
      <td>0</td>
    </tr>
    <tr>
      <th>2</th>
      <td>This season is off the charts: Colorado fights the worst wildfires in its recent history</td>
      <td><a href="https://www.theguardian.com/us-news/2020/oct/30/colorado-fires-cameron-peak-east-troublesome" target="_blank">https://www.theguardian.com/us-news/2020/oct/30/colorado-fires-cameron-peak-east-troublesome</a></td>
      <td>2020-10-30 10:00:09.000</td>
      <td>1</td>
      <td>0</td>
    </tr>
    <tr>
      <th>3</th>
      <td>Slate News Quiz: California wildfires, Keith Raniere, World Series.</td>
      <td><a href="https://slate.com/news-and-politics/2020/10/slate-news-quiz-california-wildfires-keith-raniere-world-series.html?via=rss" target="_blank">https://slate.com/news-and-politics/2020/10/slate-news-quiz-california-wildfires-keith-raniere-world-series.html?via=rss</a></td>
      <td>2020-10-30 09:55:00.000</td>
      <td>1</td>
      <td>0</td>
    </tr>
    <tr>
      <th>4</th>
      <td>Australia wildfires probe recommends climate risk forecasts</td>
      <td><a href="https://kdwn.com/2020/10/30/australia-wildfires-probe-recommends-climate-risk-forecasts/" target="_blank">https://kdwn.com/2020/10/30/australia-wildfires-probe-recommends-climate-risk-forecasts/</a></td>
      <td>2020-10-30 05:10:20.000</td>
      <td>1</td>
      <td>0</td>
    </tr>
    <tr>
      <th>5</th>
      <td>Australia wildfires probe recommends climate risk forecasts</td>
      <td><a href="https://www.winnipegfreepress.com/world/australia-wildfires-probe-recommends-climate-risk-forecasts--572920181.html" target="_blank">https://www.winnipegfreepress.com/world/australia-wildfires-probe-recommends-climate-risk-forecasts--572920181.html</a></td>
      <td>2020-10-30 05:02:07.000</td>
      <td>1</td>
      <td>1</td>
    </tr>
    <tr>
      <th>6</th>
      <td>Australia wildfires probe recommends climate risk forecasts</td>
      <td><a href="https://www.thestar.com/news/world/australia/2020/10/30/australia-wildfires-probe-recommends-climate-risk-forecasts.html" target="_blank">https://www.thestar.com/news/world/australia/2020/10/30/australia-wildfires-probe-recommends-climate-risk-forecasts.html</a></td>
      <td>2020-10-30 05:01:22.000</td>
      <td>1</td>
      <td>0</td>
    </tr>
    <tr>
      <th>7</th>
      <td>Wildfire fund raises over $800,000 for evacuees</td>
      <td><a href="https://www.skyhinews.com/news/wildfire-fund-raises-over-800000-for-evacuees/" target="_blank">https://www.skyhinews.com/news/wildfire-fund-raises-over-800000-for-evacuees/</a></td>
      <td>2020-10-29 22:21:20.000</td>
      <td>1</td>
      <td>0</td>
    </tr>
    <tr>
      <th>8</th>
      <td>2 Colorado wildfires have burned nearly 700 structures</td>
      <td><a href="https://www.washingtontimes.com/news/2020/oct/29/2-colorado-wildfires-have-burned-nearly-700-struct/?utm_source=RSS_Feed&amp;utm_medium=RSS" target="_blank">https://www.washingtontimes.com/news/2020/oct/29/2-colorado-wildfires-have-burned-nearly-700-struct/?utm_source=RSS_Feed&amp;utm_medium=RSS</a></td>
      <td>2020-10-29 22:04:16.000</td>
      <td>1</td>
      <td>3</td>
    </tr>
    <tr>
      <th>9</th>
      <td>Scientists Want New Framework for Understanding California Wildfire Costs, Risks</td>
      <td><a href="https://www.insurancejournal.com/news/west/2020/10/29/588768.htm" target="_blank">https://www.insurancejournal.com/news/west/2020/10/29/588768.htm</a></td>
      <td>2020-10-29 20:58:05.000</td>
      <td>1</td>
      <td>0</td>
    </tr>
    <tr>
      <th>10</th>
      <td>Pioche Soldier battles two relentless Nevada enemies: wildfires and pandemic</td>
      <td><a href="https://www.dvidshub.net/news/382041/pioche-soldier-battles-two-relentless-nevada-enemies-wildfires-and-pandemic" target="_blank">https://www.dvidshub.net/news/382041/pioche-soldier-battles-two-relentless-nevada-enemies-wildfires-and-pandemic</a></td>
      <td>2020-10-29 20:24:11.000</td>
      <td>1</td>
      <td>1</td>
    </tr>
    <tr>
      <th>11</th>
      <td>Winemakers taking matters into their own hands to save harvests from wildfires</td>
      <td><a href="https://www.abcactionnews.com/news/national/winemakers-taking-matters-into-their-own-hands-to-save-harvests-from-wildfires" target="_blank">https://www.abcactionnews.com/news/national/winemakers-taking-matters-into-their-own-hands-to-save-harvests-from-wildfires</a></td>
      <td>2020-10-29 19:34:54.000</td>
      <td>1</td>
      <td>0</td>
    </tr>
    <tr>
      <th>12</th>
      <td>Evacuation orders lifted for many Californians who fled wildfires in Orange County</td>
      <td><a href="http://feeds.foxnews.com/~r/foxnews/national/~3/BiNX-LWvhcI/california-wildfire-silverado-fire-blue-ridge-orange-county-irvine-fire-weather" target="_blank">http://feeds.foxnews.com/~r/foxnews/national/~3/BiNX-LWvhcI/california-wildfire-silverado-fire-blue-ridge-orange-county-irvine-fire-weather</a></td>
      <td>2020-10-29 17:19:31.000</td>
      <td>1</td>
      <td>0</td>
    </tr>
    <tr>
      <th>13</th>
      <td>Homeowners doing what they can to protect properties from record-breaking wildfires</td>
      <td><a href="https://www.kgun9.com/news/national/homeowners-doing-what-they-can-to-protect-properties-as-record-wildfire-season-continues" target="_blank">https://www.kgun9.com/news/national/homeowners-doing-what-they-can-to-protect-properties-as-record-wildfire-season-continues</a></td>
      <td>2020-10-29 16:22:38.000</td>
      <td>1</td>
      <td>1</td>
    </tr>
    <tr>
      <th>14</th>
      <td>Ash-tonishing: Colorado Wildfires Send Ash Raining Down</td>
      <td><a href="https://thewestsidegazette.com/ash-tonishing-colorado-wildfires-send-ash-raining-down/" target="_blank">https://thewestsidegazette.com/ash-tonishing-colorado-wildfires-send-ash-raining-down/</a></td>
      <td>2020-10-29 15:05:38.000</td>
      <td>1</td>
      <td>1</td>
    </tr>
    <tr>
      <th>15</th>
      <td>California wildfires' death toll rises to 32</td>
      <td><a href="https://www.mercurynews.com/2020/10/29/california-wildfires-death-toll-rises-to-32/" target="_blank">https://www.mercurynews.com/2020/10/29/california-wildfires-death-toll-rises-to-32/</a></td>
      <td>2020-10-29 14:38:38.000</td>
      <td>1</td>
      <td>3</td>
    </tr>
    <tr>
      <th>16</th>
      <td>Building wildfire adaptive communities will be topic of OSU-Cascades Science Pub</td>
      <td><a href="https://www.mycentraloregon.com/2020/10/29/building-wildfire-adaptive-communities-will-be-topic-of-osu-cascades-science-pub/" target="_blank">https://www.mycentraloregon.com/2020/10/29/building-wildfire-adaptive-communities-will-be-topic-of-osu-cascades-science-pub/</a></td>
      <td>2020-10-29 13:34:05.000</td>
      <td>1</td>
      <td>0</td>
    </tr>
    <tr>
      <th>17</th>
      <td>Lebanon's cedar trees burn as wildfires reach record altitudes</td>
      <td><a href="https://www.dailystar.com.lb//News/Lebanon-News/2020/Oct-29/513665-lebanons-cedar-trees-burn-as-wildfires-reach-record-altitudes.ashx" target="_blank">https://www.dailystar.com.lb//News/Lebanon-News/2020/Oct-29/513665-lebanons-cedar-trees-burn-as-wildfires-reach-record-altitudes.ashx</a></td>
      <td>2020-10-29 13:07:00.000</td>
      <td>1</td>
      <td>0</td>
    </tr>
    <tr>
      <th>18</th>
      <td>Election Season Is Wildfire Season. These Voters Lost Everything.</td>
      <td><a href="https://newrepublic.com/article/159989/election-season-wildfire-season-voters-lost-everything" target="_blank">https://newrepublic.com/article/159989/election-season-wildfire-season-voters-lost-everything</a></td>
      <td>2020-10-29 10:00:00.000</td>
      <td>1</td>
      <td>3</td>
    </tr>
    <tr>
      <th>19</th>
      <td>Election 2020, Zeta, California wildfires: 5 things to know Friday</td>
      <td><a href="http://rssfeeds.usatoday.com/~/637870324/0/usatoday-newstopstories~Trump-and-Biden-campaigns-Tropical-Storm-Zeta-California-wildfires-things-to-know-Thursday/" target="_blank">http://rssfeeds.usatoday.com/~/637870324/0/usatoday-newstopstories~Trump-and-Biden-campaigns-Tropical-Storm-Zeta-California-wildfires-things-to-know-Thursday/</a></td>
      <td>2020-10-29 09:35:23.000</td>
      <td>1</td>
      <td>1</td>
    </tr>
    <tr>
      <th>20</th>
      <td>Southern California's latest wildfires rage with little progress reported</td>
      <td><a href="http://selkirkjournal.com/news/world/southern-californias-latest-wildfires-rage-with-little-progress-reported" target="_blank">http://selkirkjournal.com/news/world/southern-californias-latest-wildfires-rage-with-little-progress-reported</a></td>
      <td>2020-10-29 09:30:00.000</td>
      <td>1</td>
      <td>1</td>
    </tr>
    <tr>
      <th>21</th>
      <td>Podcast: When Wildfires and a Pandemic Collide</td>
      <td><a href="https://undark.org/2020/10/29/podcast-49-wildfire-pandemic-collide/" target="_blank">https://undark.org/2020/10/29/podcast-49-wildfire-pandemic-collide/</a></td>
      <td>2020-10-29 09:26:32.000</td>
      <td>1</td>
      <td>3</td>
    </tr>
    <tr>
      <th>22</th>
      <td>Mobile Voting Helps Wildfire Victims Cast Their Ballots : NPR</td>
      <td><a href="https://www.npr.org/2020/10/29/928316561/votemobile-helps-california-wildfire-victims-and-seniors-cast-their-ballots?utm_medium=RSS&amp;utm_campaign=nprstoriesfromnpr" target="_blank">https://www.npr.org/2020/10/29/928316561/votemobile-helps-california-wildfire-victims-and-seniors-cast-their-ballots?utm_medium=RSS&amp;utm_campaign=nprstoriesfromnpr</a></td>
      <td>2020-10-29 09:05:41.000</td>
      <td>1</td>
      <td>3</td>
    </tr>
    <tr>
      <th>23</th>
      <td>VoteMobile Helps California Wildfire Victims And Seniors Cast Their Ballots</td>
      <td><a href="https://www.wksu.org/npr-news/2020-10-29/votemobile-helps-california-wildfire-victims-and-seniors-cast-their-ballots" target="_blank">https://www.wksu.org/npr-news/2020-10-29/votemobile-helps-california-wildfire-victims-and-seniors-cast-their-ballots</a></td>
      <td>2020-10-29 09:05:41.000</td>
      <td>1</td>
      <td>3</td>
    </tr>
    <tr>
      <th>24</th>
      <td>Smoke, helicopters, fire, suffering: This is life in a wildfire zone</td>
      <td><a href="https://lasvegassun.com//news/2020/oct/29/smoke-helicopters-fire-suffering-this-is-life-in-a/" target="_blank">https://lasvegassun.com//news/2020/oct/29/smoke-helicopters-fire-suffering-this-is-life-in-a/</a></td>
      <td>2020-10-29 09:00:00.000</td>
      <td>1</td>
      <td>3</td>
    </tr>
    <tr>
      <th>25</th>
      <td>Trump and Biden campaigns, Tropical Storm Zeta, California wildfires: 5 things to know Thursday</td>
      <td><a href="https://news.yahoo.com/trump-biden-campaigns-tropical-storm-082813407.html" target="_blank">https://news.yahoo.com/trump-biden-campaigns-tropical-storm-082813407.html</a></td>
      <td>2020-10-29 08:28:13.000</td>
      <td>1</td>
      <td>1</td>
    </tr>
    <tr>
      <th>26</th>
      <td>California Crews Begin To Tame Two Orange County Wildfires As Winds Ebb</td>
      <td><a href="https://www.news18.com/news/world/california-crews-begin-to-tame-two-orange-county-wildfires-as-winds-ebb-3019031.html" target="_blank">https://www.news18.com/news/world/california-crews-begin-to-tame-two-orange-county-wildfires-as-winds-ebb-3019031.html</a></td>
      <td>2020-10-29 03:57:46.000</td>
      <td>1</td>
      <td>1</td>
    </tr>
    <tr>
      <th>27</th>
      <td>Evacuation orders lifted for thousands of Californians who fled wildfires</td>
      <td><a href="https://globalnews.ca/news/7429107/california-wildfires-evacuation-orders-lifted/" target="_blank">https://globalnews.ca/news/7429107/california-wildfires-evacuation-orders-lifted/</a></td>
      <td>2020-10-29 03:23:44.000</td>
      <td>1</td>
      <td>1</td>
    </tr>
    <tr>
      <th>28</th>
      <td>John Cena to Donate Money to First Responders Fighting Cali Wildfires</td>
      <td><a href="http://eonline.com/ap/news/1087304/john-cena-pledges-500-000-to-aid-first-responders-fighting-california-wildfires" target="_blank">http://eonline.com/ap/news/1087304/john-cena-pledges-500-000-to-aid-first-responders-fighting-california-wildfires</a></td>
      <td>2020-10-29 01:05:34.000</td>
      <td>1</td>
      <td>3</td>
    </tr>
    <tr>
      <th>29</th>
      <td>Wildfires force thousands to evacuate near Los Angeles: Here's how the 2020 Western fire season got so extreme</td>
      <td><a href="http://redgreenandblue.org/2020/10/28/wildfires-force-thousands-evacuate-near-los-angeles-heres-2020-western-fire-season-got-extreme/" target="_blank">http://redgreenandblue.org/2020/10/28/wildfires-force-thousands-evacuate-near-los-angeles-heres-2020-western-fire-season-got-extreme/</a></td>
      <td>2020-10-29 01:00:01.000</td>
      <td>1</td>
      <td>0</td>
    </tr>
    <tr>
      <th>30</th>
      <td>California crews begin to tame two Orange County wildfires as winds ebb, United States News &amp; Top Stories</td>
      <td><a href="http://www.straitstimes.com/world/united-states/california-crews-begin-to-tame-two-orange-county-wildfires-as-winds-ebb" target="_blank">http://www.straitstimes.com/world/united-states/california-crews-begin-to-tame-two-orange-county-wildfires-as-winds-ebb</a></td>
      <td>2020-10-29 00:01:56.000</td>
      <td>1</td>
      <td>1</td>
    </tr>
    <tr>
      <th>31</th>
      <td>Northern California Wildfires: Where To Find Updates On Air Quality, Evacuations, And Official Information</td>
      <td><a href="http://capradio.org/articles/2020/10/29/latest-updates-on-northern-california-wildfires/" target="_blank">http://capradio.org/articles/2020/10/29/latest-updates-on-northern-california-wildfires/</a></td>
      <td>2020-10-29 00:00:00.000</td>
      <td>1</td>
      <td>1</td>
    </tr>
    <tr>
      <th>32</th>
      <td>Velodyne Lidar Sensors Power LineVision's V3 Overhead Power Line Monitoring System</td>
      <td><a href="http://business.financialpost.com/pmn/press-releases-pmn/business-wire-news-releases-pmn/velodyne-lidar-sensors-power-linevisions-v3-overhead-power-line-monitoring-system" target="_blank">http://business.financialpost.com/pmn/press-releases-pmn/business-wire-news-releases-pmn/velodyne-lidar-sensors-power-linevisions-v3-overhead-power-line-monitoring-system</a></td>
      <td>2020-10-29 00:00:00.000</td>
      <td>1</td>
      <td>0</td>
    </tr>
    <tr>
      <th>33</th>
      <td>UPDATE 1-California crews begin to tame two Orange County wildfires as winds ebb</td>
      <td><a href="http://uk.reuters.com/article/usa-wildfires-california/update-1-california-crews-begin-to-tame-two-orange-county-wildfires-as-winds-ebb-idukl1n2hj3du" target="_blank">http://uk.reuters.com/article/usa-wildfires-california/update-1-california-crews-begin-to-tame-two-orange-county-wildfires-as-winds-ebb-idukl1n2hj3du</a></td>
      <td>2020-10-28 23:50:36.000</td>
      <td>1</td>
      <td>1</td>
    </tr>
    <tr>
      <th>34</th>
      <td>Southern California zoo evacuates ahead of growing wildfire</td>
      <td><a href="https://www.wkbw.com/news/national/southern-california-zoo-evacuates-ahead-of-growing-wildfire" target="_blank">https://www.wkbw.com/news/national/southern-california-zoo-evacuates-ahead-of-growing-wildfire</a></td>
      <td>2020-10-28 20:40:29.000</td>
      <td>1</td>
      <td>1</td>
    </tr>
    <tr>
      <th>35</th>
      <td>AP Explains: Power shutoffs to prevent California wildfires</td>
      <td><a href="https://www.mymotherlode.com/news/state/1342868/ap-explains-power-shutoffs-to-prevent-california-wildfires.html" target="_blank">https://www.mymotherlode.com/news/state/1342868/ap-explains-power-shutoffs-to-prevent-california-wildfires.html</a></td>
      <td>2020-10-28 19:37:41.000</td>
      <td>1</td>
      <td>1</td>
    </tr>
    <tr>
      <th>36</th>
      <td>Marin Voice: As our trauma mounts around us, its time to realize we must change  Marin Independent Journal</td>
      <td><a href="https://www.marinij.com/2020/10/28/marin-voice-as-our-trauma-mounts-around-us-its-time-to-realize-we-must-change/" target="_blank">https://www.marinij.com/2020/10/28/marin-voice-as-our-trauma-mounts-around-us-its-time-to-realize-we-must-change/</a></td>
      <td>2020-10-28 19:00:28.000</td>
      <td>1</td>
      <td>3</td>
    </tr>
    <tr>
      <th>37</th>
      <td>The Wests Best Offense Against Wildfires Is Failing</td>
      <td><a href="https://www.theatlantic.com/science/archive/2020/10/prescribed-burns-are-failing/616889/?utm_source=feed" target="_blank">https://www.theatlantic.com/science/archive/2020/10/prescribed-burns-are-failing/616889/?utm_source=feed</a></td>
      <td>2020-10-28 18:46:10.000</td>
      <td>1</td>
      <td>0</td>
    </tr>
    <tr>
      <th>38</th>
      <td>Researchers find confusion over masks for wildfire, COVID-19 crises</td>
      <td><a href="https://www.newswise.com/articles/view/740687/?sc=c103" target="_blank">https://www.newswise.com/articles/view/740687/?sc=c103</a></td>
      <td>2020-10-28 18:05:30.000</td>
      <td>1</td>
      <td>0</td>
    </tr>
    <tr>
      <th>39</th>
      <td>Clackamas County deputy who suggested antifa connection to wildfires retires</td>
      <td><a href="https://www.oregonlive.com/clackamascounty/2020/10/clackamas-county-deputy-who-suggested-antifa-connection-to-wildfires-retires.html" target="_blank">https://www.oregonlive.com/clackamascounty/2020/10/clackamas-county-deputy-who-suggested-antifa-connection-to-wildfires-retires.html</a></td>
      <td>2020-10-28 17:43:28.268</td>
      <td>1</td>
      <td>3</td>
    </tr>
    <tr>
      <th>40</th>
      <td>Calmer winds help crews make gains against two Southern California wildfires</td>
      <td><a href="http://theglobeandmail.com/world/article-calmer-winds-help-crews-make-gains-against-two-southern-california/" target="_blank">http://theglobeandmail.com/world/article-calmer-winds-help-crews-make-gains-against-two-southern-california/</a></td>
      <td>2020-10-28 15:50:31.000</td>
      <td>1</td>
      <td>1</td>
    </tr>
    <tr>
      <th>41</th>
      <td>Evacuations continue as California wildfires grow</td>
      <td><a href="https://fox26medford.com/evacuations-continue-as-california-wildfires-grow/" target="_blank">https://fox26medford.com/evacuations-continue-as-california-wildfires-grow/</a></td>
      <td>2020-10-28 15:42:26.000</td>
      <td>1</td>
      <td>1</td>
    </tr>
    <tr>
      <th>42</th>
      <td>Wildfire victims shop for free at Serres Nursery</td>
      <td><a href="https://pamplinmedia.com/pt/9-news/485863-391377-wildfire-victims-shop-for-free-at-serres-nursery" target="_blank">https://pamplinmedia.com/pt/9-news/485863-391377-wildfire-victims-shop-for-free-at-serres-nursery</a></td>
      <td>2020-10-28 15:00:00.000</td>
      <td>1</td>
      <td>3</td>
    </tr>
    <tr>
      <th>43</th>
      <td>Photos: Wind-whipped wildfires scorch California again</td>
      <td><a href="http://auburnpub.com/collection_13f9df2a-2ec5-5796-a87e-d9346bd5e4dc.html" target="_blank">http://auburnpub.com/collection_13f9df2a-2ec5-5796-a87e-d9346bd5e4dc.html</a></td>
      <td>2020-10-28 14:30:00.000</td>
      <td>1</td>
      <td>1</td>
    </tr>
    <tr>
      <th>44</th>
      <td>An exodus: How wildfires have driven migration from Butte County  Silicon Valley</td>
      <td><a href="https://www.siliconvalley.com/2020/10/28/an-exodus-how-fires-have-driven-migration-from-butte-county/" target="_blank">https://www.siliconvalley.com/2020/10/28/an-exodus-how-fires-have-driven-migration-from-butte-county/</a></td>
      <td>2020-10-28 13:59:48.000</td>
      <td>1</td>
      <td>3</td>
    </tr>
    <tr>
      <th>45</th>
      <td>Which mask works? Researchers find confusion over mask use for wildfire, COVID-19 crises</td>
      <td><a href="http://earth.stanford.edu/news/which-mask-works-researchers-find-confusion-over-mask-use-wildfire-covid-19-crises" target="_blank">http://earth.stanford.edu/news/which-mask-works-researchers-find-confusion-over-mask-use-wildfire-covid-19-crises</a></td>
      <td>2020-10-28 13:18:00.000</td>
      <td>1</td>
      <td>0</td>
    </tr>
    <tr>
      <th>46</th>
      <td>California wildfires put 90,000 people under emergency evacuation orders</td>
      <td><a href="https://www.irishtimes.com/news/world/us/california-wildfires-put-90-000-people-under-emergency-evacuation-orders-1.4393131?localLinksEnabled=false" target="_blank">https://www.irishtimes.com/news/world/us/california-wildfires-put-90-000-people-under-emergency-evacuation-orders-1.4393131?localLinksEnabled=false</a></td>
      <td>2020-10-28 08:36:04.000</td>
      <td>1</td>
      <td>1</td>
    </tr>
    <tr>
      <th>47</th>
      <td>In Pictures: Wildfires continue to rage across USs California</td>
      <td><a href="https://www.aljazeera.com/gallery/2020/10/28/in-pictures-californias-latest-wildfires/" target="_blank">https://www.aljazeera.com/gallery/2020/10/28/in-pictures-californias-latest-wildfires/</a></td>
      <td>2020-10-28 07:24:53.000</td>
      <td>1</td>
      <td>1</td>
    </tr>
    <tr>
      <th>48</th>
      <td>What its like to fight wildfires in 2020</td>
      <td><a href="http://feeds.fastcompany.com/~r/fastcoexist/feed/~3/GORSrlVyeLI/i-worked-246-hours-in-15-days-what-its-like-to-fight-wildfires-in-2020" target="_blank">http://feeds.fastcompany.com/~r/fastcoexist/feed/~3/GORSrlVyeLI/i-worked-246-hours-in-15-days-what-its-like-to-fight-wildfires-in-2020</a></td>
      <td>2020-10-28 07:00:34.000</td>
      <td>1</td>
      <td>1</td>
    </tr>
    <tr>
      <th>49</th>
      <td>California utility slow to pull plug before wildfire erupted</td>
      <td><a href="http://gazette.com/ap/national/california-utility-slow-to-pull-plug-before-wildfire-erupted/article_02e438e8-026a-569d-8797-3f5c8ed43c15.html" target="_blank">http://gazette.com/ap/national/california-utility-slow-to-pull-plug-before-wildfire-erupted/article_02e438e8-026a-569d-8797-3f5c8ed43c15.html</a></td>
      <td>2020-10-28 05:22:13.000</td>
      <td>1</td>
      <td>1</td>
    </tr>
  </tbody>
</table>'''

In [5]:
table=pd.read_html(html_table)[0]

In [6]:
table.head()

Unnamed: 0.1,Unnamed: 0,title,url,datePublished,is_disaster,predicted_topic
0,0,Her tribe tamed wildfires for generations. The...,http://rss.cnn.com/~r/rss/cnn_freevideo/~3/o9Q...,2020-10-30 11:38:51.000,1,0
1,1,Study of California wildfire costs suggests ne...,https://www.chicoer.com/2020/10/30/study-of-ca...,2020-10-30 11:25:29.000,1,0
2,2,This season is off the charts: Colorado fights...,https://www.theguardian.com/us-news/2020/oct/3...,2020-10-30 10:00:09.000,1,0
3,3,"Slate News Quiz: California wildfires, Keith R...",https://slate.com/news-and-politics/2020/10/sl...,2020-10-30 09:55:00.000,1,0
4,4,Australia wildfires probe recommends climate r...,https://kdwn.com/2020/10/30/australia-wildfire...,2020-10-30 05:10:20.000,1,0


In [7]:
table.drop(columns=['Unnamed: 0'], inplace = True)

In [10]:
disasters_only=table[table['is_disaster']==1]

In [11]:
disasters_only

Unnamed: 0,title,url,datePublished,is_disaster,predicted_topic
0,Her tribe tamed wildfires for generations. The...,http://rss.cnn.com/~r/rss/cnn_freevideo/~3/o9Q...,2020-10-30 11:38:51.000,1,0
1,Study of California wildfire costs suggests ne...,https://www.chicoer.com/2020/10/30/study-of-ca...,2020-10-30 11:25:29.000,1,0
2,This season is off the charts: Colorado fights...,https://www.theguardian.com/us-news/2020/oct/3...,2020-10-30 10:00:09.000,1,0
3,"Slate News Quiz: California wildfires, Keith R...",https://slate.com/news-and-politics/2020/10/sl...,2020-10-30 09:55:00.000,1,0
4,Australia wildfires probe recommends climate r...,https://kdwn.com/2020/10/30/australia-wildfire...,2020-10-30 05:10:20.000,1,0
5,Australia wildfires probe recommends climate r...,https://www.winnipegfreepress.com/world/austra...,2020-10-30 05:02:07.000,1,1
6,Australia wildfires probe recommends climate r...,https://www.thestar.com/news/world/australia/2...,2020-10-30 05:01:22.000,1,0
7,"Wildfire fund raises over $800,000 for evacuees",https://www.skyhinews.com/news/wildfire-fund-r...,2020-10-29 22:21:20.000,1,0
8,2 Colorado wildfires have burned nearly 700 st...,https://www.washingtontimes.com/news/2020/oct/...,2020-10-29 22:04:16.000,1,3
9,Scientists Want New Framework for Understandin...,https://www.insurancejournal.com/news/west/202...,2020-10-29 20:58:05.000,1,0


In [19]:
topics=disasters_only['predicted_topic'].unique()

In [20]:
topics

array([0, 1, 3], dtype=int64)

In [25]:
def search_keyword(text):
    url = "https://rapidapi.p.rapidapi.com/api/search/NewsSearchAPI"
    querystring = {"pageSize":"50",
                    "q": text,
                    "autoCorrect":"true",
                    "pageNumber":"1",
                    "toPublishedDate":"null",
                    "fromPublishedDate":"null"}
    headers = {
    'x-rapidapi-host': "contextualwebsearch-websearch-v1.p.rapidapi.com",
    'x-rapidapi-key': "55c35a554dmshab5af4556f598ffp1d3d45jsn8b763429c95f"
    }
    response = requests.request("GET", url, headers=headers, params=querystring)
    page_1_data=response.json()
    total_pages= (page_1_data['totalCount'] // 50) + 1
    req_df=pd.DataFrame(page_1_data['value'])
    #return all results or first 250, whichever is smaller.
    if total_pages > 1:
    	 for i in range(2, min(total_pages, 6)):
            querystring['pageNumber']=i
            page_req=requests.get(url, headers=headers, params=querystring)
            page_data=page_req.json()
            while page_req.status_code != 200:
                time.sleep(1)
                page_req= requests.get(url, headers=headers, params=querystring)
            page_df=pd.DataFrame(page_data['value'])
            req_df=pd.concat([req_df, page_df])


    mask=req_df['provider'] == {'name':'wikipedia'}
    #get out of here, wikipedia
    req_df.drop(req_df[mask].index, inplace=True)
    req_df.drop(columns=["id", "isSafe", "image", "keywords"], inplace=True)
    req_df['datePublished']= pd.to_datetime(req_df['datePublished'])
    req_df.reset_index(drop=True, inplace=True)
    return req_df

In [26]:
search_keyword('fire')

Unnamed: 0,title,url,description,body,language,datePublished,provider
0,"The Irvine fire was a recipe for disaster, but...",https://www.stripes.com/news/us/the-irvine-fir...,The fire started on the outskirts of master-pl...,By JOSEPH SERNA | Los Angeles Times | Publishe...,en,2020-10-30 17:53:00,{'name': 'stripes'}
1,Firefighters battle fire at Fontana gas statio...,https://www.dailybulletin.com/2020/10/30/firef...,Firefighters headed to the Valero gas station ...,View Comments\nJoin the Conversation\nWe invit...,en,2020-10-30 16:43:11,{'name': 'dailybulletin'}
2,"Teen smashes FDNY windshield, pulls knife on f...",https://nypost.com/2020/10/30/teen-smashes-fdn...,The encounter unfolded just after 10:15 a.m. o...,NYPD shakes up top spots after naming NYPDs fi...,en,2020-10-30 16:31:37,{'name': 'nypost'}
3,"Blue Ridge, Silverado fires getting contained ...",https://www.ocregister.com/2020/10/30/blue-rid...,"By Thursday evening, all evacuation orders in ...",An Ontario firefighter looks for hot spots fro...,en,2020-10-30 15:28:34,{'name': 'ocregister'}
4,"Tough firefighters, cuddly pets team up for ca...",http://www.nrtoday.com/news/health/tough-firef...,Firefighters and rescue animals teamed up to m...,Save\nFirefighters and rescue animals teamed u...,en,2020-10-30 15:00:00,{'name': 'nrtoday'}
...,...,...,...,...,...,...,...
245,Pigeon stuck in netting near Ipswich Buttermar...,https://www.eadt.co.uk/fire-crews-rescue-trapp...,Firefighters are trying to rescue a pigeon whi...,Fire crew sent to pigeon stuck in netting near...,en,2020-10-19 10:36:55,{'name': 'eadt'}
246,Tennessee fires defensive line coach following...,https://www.nationofblue.com/tennessee-fires-d...,Tennessee has fired defensive line coach Jimmy...,Tennessee has fired defensive line coach Jimmy...,en,2020-10-19 04:35:45,{'name': 'nationofblue'}
247,Fire service called out to fewer fires across ...,http://theargus.co.uk/news/18803957.fire-servi...,FIREFIGHTERS in Oxfordshire were called to a r...,comment\nFIREFIGHTERS in Oxfordshire were call...,en,2020-10-19 04:01:40,{'name': 'theargus'}
248,Firefighters try to contain fire on Mount Kili...,http://thestarphoenix.com/news/world/firefight...,"DAR ES SALAAM Tanzanian residents, students a...","The fire started at the Whona area, a rest cen...",en,2020-10-19 02:37:00,{'name': 'thestarphoenix'}
