Skip to content

Commit

Permalink
#2 Count the news articles by DATE using themes
Browse files Browse the repository at this point in the history
  • Loading branch information
gisfromscratch committed Mar 19, 2020
1 parent 6c54c02 commit 1a0348a
Show file tree
Hide file tree
Showing 2 changed files with 301 additions and 125 deletions.
250 changes: 125 additions & 125 deletions notebooks/Knowledge Graph Queries.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -48,7 +48,7 @@
},
{
"cell_type": "code",
"execution_count": 56,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -88,7 +88,7 @@
},
{
"cell_type": "code",
"execution_count": 57,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -109,7 +109,7 @@
},
{
"cell_type": "code",
"execution_count": 58,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -125,7 +125,7 @@
},
{
"cell_type": "code",
"execution_count": 59,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -192,7 +192,7 @@
},
{
"cell_type": "code",
"execution_count": 60,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -220,7 +220,7 @@
},
{
"cell_type": "code",
"execution_count": 61,
"execution_count": 7,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -256,53 +256,53 @@
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>20200314000000-1</td>\n",
" <td>20200314000000</td>\n",
" <td>mykxlg.com</td>\n",
" <td>https://www.mykxlg.com/news/national/on-eve-of...</td>\n",
" <td>Hollywood, California, United States</td>\n",
" <td>34.0983</td>\n",
" <td>-118.327</td>\n",
" <td>20200318001500-1</td>\n",
" <td>20200318001500</td>\n",
" <td>thestar.com</td>\n",
" <td>https://www.thestar.com/news/gta/2020/03/17/ho...</td>\n",
" <td>Leamington, Warwickshire, United Kingdom</td>\n",
" <td>52.3</td>\n",
" <td>-1.53333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>20200314000000-1</td>\n",
" <td>20200314000000</td>\n",
" <td>mykxlg.com</td>\n",
" <td>https://www.mykxlg.com/news/national/on-eve-of...</td>\n",
" <td>Anaheim, California, United States</td>\n",
" <td>33.8353</td>\n",
" <td>-117.915</td>\n",
" <td>20200318001500-1</td>\n",
" <td>20200318001500</td>\n",
" <td>thestar.com</td>\n",
" <td>https://www.thestar.com/news/gta/2020/03/17/ho...</td>\n",
" <td>Toronto, Ontario, Canada</td>\n",
" <td>43.6667</td>\n",
" <td>-79.4167</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>20200314000000-3</td>\n",
" <td>20200314000000</td>\n",
" <td>newportri.com</td>\n",
" <td>https://www.newportri.com/zz/news/20200313/sch...</td>\n",
" <td>Philadelphia, Pennsylvania, United States</td>\n",
" <td>39.9523</td>\n",
" <td>-75.1638</td>\n",
" <th>1</th>\n",
" <td>20200318001500-1</td>\n",
" <td>20200318001500</td>\n",
" <td>thestar.com</td>\n",
" <td>https://www.thestar.com/news/gta/2020/03/17/ho...</td>\n",
" <td>Wexford, Wexford, Ireland</td>\n",
" <td>52.3342</td>\n",
" <td>-6.4575</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>20200314000000-3</td>\n",
" <td>20200314000000</td>\n",
" <td>newportri.com</td>\n",
" <td>https://www.newportri.com/zz/news/20200313/sch...</td>\n",
" <td>Cleveland, Ohio, United States</td>\n",
" <td>41.4995</td>\n",
" <td>-81.6954</td>\n",
" <th>1</th>\n",
" <td>20200318001500-1</td>\n",
" <td>20200318001500</td>\n",
" <td>thestar.com</td>\n",
" <td>https://www.thestar.com/news/gta/2020/03/17/ho...</td>\n",
" <td>London, London, City Of, United Kingdom</td>\n",
" <td>51.5</td>\n",
" <td>-0.116667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>20200314000000-3</td>\n",
" <td>20200314000000</td>\n",
" <td>newportri.com</td>\n",
" <td>https://www.newportri.com/zz/news/20200313/sch...</td>\n",
" <td>Valley High School, California, United States</td>\n",
" <td>33.7231</td>\n",
" <td>-117.901</td>\n",
" <th>13</th>\n",
" <td>20200318001500-13</td>\n",
" <td>20200318001500</td>\n",
" <td>kxrb.com</td>\n",
" <td>https://kxrb.com/sioux-falls-among-10-most-rec...</td>\n",
" <td>Sioux Falls, South Dakota, United States</td>\n",
" <td>43.55</td>\n",
" <td>-96.7003</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
Expand All @@ -315,104 +315,104 @@
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>79207</th>\n",
" <td>20200315191500-1533</td>\n",
" <td>20200315191500</td>\n",
" <td>rockymounttelegram.com</td>\n",
" <td>https://www.rockymounttelegram.com/news/nation...</td>\n",
" <td>Iditarod, Alaska, United States</td>\n",
" <td>62.5444</td>\n",
" <td>-158.095</td>\n",
" <th>115739</th>\n",
" <td>20200319163000-232</td>\n",
" <td>20200319163000</td>\n",
" <td>timesreporter.com</td>\n",
" <td>https://www.timesreporter.com/opinion/20200319...</td>\n",
" <td>East Cleveland, Ohio, United States</td>\n",
" <td>41.5331</td>\n",
" <td>-81.579</td>\n",
" </tr>\n",
" <tr>\n",
" <th>79207</th>\n",
" <td>20200315191500-1533</td>\n",
" <td>20200315191500</td>\n",
" <td>rockymounttelegram.com</td>\n",
" <td>https://www.rockymounttelegram.com/news/nation...</td>\n",
" <td>Anchorage, Alaska, United States</td>\n",
" <td>61.2181</td>\n",
" <td>-149.9</td>\n",
" <th>115742</th>\n",
" <td>20200319163000-235</td>\n",
" <td>20200319163000</td>\n",
" <td>sun-sentinel.com</td>\n",
" <td>http://www.sun-sentinel.com/news/nationworld/c...</td>\n",
" <td>Beirut, Beyrouth, Lebanon</td>\n",
" <td>33.8719</td>\n",
" <td>35.5097</td>\n",
" </tr>\n",
" <tr>\n",
" <th>79207</th>\n",
" <td>20200315191500-1533</td>\n",
" <td>20200315191500</td>\n",
" <td>rockymounttelegram.com</td>\n",
" <td>https://www.rockymounttelegram.com/news/nation...</td>\n",
" <td>Kaltag, Alaska, United States</td>\n",
" <td>64.3272</td>\n",
" <td>-158.722</td>\n",
" <th>115742</th>\n",
" <td>20200319163000-235</td>\n",
" <td>20200319163000</td>\n",
" <td>sun-sentinel.com</td>\n",
" <td>http://www.sun-sentinel.com/news/nationworld/c...</td>\n",
" <td>Washington, Washington, United States</td>\n",
" <td>38.8951</td>\n",
" <td>-77.0364</td>\n",
" </tr>\n",
" <tr>\n",
" <th>79207</th>\n",
" <td>20200315191500-1533</td>\n",
" <td>20200315191500</td>\n",
" <td>rockymounttelegram.com</td>\n",
" <td>https://www.rockymounttelegram.com/news/nation...</td>\n",
" <td>Nulato, Alaska, United States</td>\n",
" <td>64.7194</td>\n",
" <td>-158.103</td>\n",
" <th>115742</th>\n",
" <td>20200319163000-235</td>\n",
" <td>20200319163000</td>\n",
" <td>sun-sentinel.com</td>\n",
" <td>http://www.sun-sentinel.com/news/nationworld/c...</td>\n",
" <td>Dover, New Hampshire, United States</td>\n",
" <td>43.1979</td>\n",
" <td>-70.8737</td>\n",
" </tr>\n",
" <tr>\n",
" <th>79208</th>\n",
" <td>20200315191500-1534</td>\n",
" <td>20200315191500</td>\n",
" <td>greenwichtime.com</td>\n",
" <td>https://www.greenwichtime.com/news/coronavirus...</td>\n",
" <td>Farmington, Connecticut, United States</td>\n",
" <td>41.7198</td>\n",
" <td>-72.832</td>\n",
" <th>115743</th>\n",
" <td>20200319163000-236</td>\n",
" <td>20200319163000</td>\n",
" <td>newindianexpress.com</td>\n",
" <td>https://www.newindianexpress.com/cities/delhi/...</td>\n",
" <td>Delhi, Delhi, India</td>\n",
" <td>28.6667</td>\n",
" <td>77.2167</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>204440 rows × 7 columns</p>\n",
"<p>244229 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" GKGRECORDID DATE SourceCommonName \\\n",
"1 20200314000000-1 20200314000000 mykxlg.com \n",
"1 20200314000000-1 20200314000000 mykxlg.com \n",
"3 20200314000000-3 20200314000000 newportri.com \n",
"3 20200314000000-3 20200314000000 newportri.com \n",
"3 20200314000000-3 20200314000000 newportri.com \n",
"... ... ... ... \n",
"79207 20200315191500-1533 20200315191500 rockymounttelegram.com \n",
"79207 20200315191500-1533 20200315191500 rockymounttelegram.com \n",
"79207 20200315191500-1533 20200315191500 rockymounttelegram.com \n",
"79207 20200315191500-1533 20200315191500 rockymounttelegram.com \n",
"79208 20200315191500-1534 20200315191500 greenwichtime.com \n",
" GKGRECORDID DATE SourceCommonName \\\n",
"1 20200318001500-1 20200318001500 thestar.com \n",
"1 20200318001500-1 20200318001500 thestar.com \n",
"1 20200318001500-1 20200318001500 thestar.com \n",
"1 20200318001500-1 20200318001500 thestar.com \n",
"13 20200318001500-13 20200318001500 kxrb.com \n",
"... ... ... ... \n",
"115739 20200319163000-232 20200319163000 timesreporter.com \n",
"115742 20200319163000-235 20200319163000 sun-sentinel.com \n",
"115742 20200319163000-235 20200319163000 sun-sentinel.com \n",
"115742 20200319163000-235 20200319163000 sun-sentinel.com \n",
"115743 20200319163000-236 20200319163000 newindianexpress.com \n",
"\n",
" DocumentIdentifier \\\n",
"1 https://www.mykxlg.com/news/national/on-eve-of... \n",
"1 https://www.mykxlg.com/news/national/on-eve-of... \n",
"3 https://www.newportri.com/zz/news/20200313/sch... \n",
"3 https://www.newportri.com/zz/news/20200313/sch... \n",
"3 https://www.newportri.com/zz/news/20200313/sch... \n",
"... ... \n",
"79207 https://www.rockymounttelegram.com/news/nation... \n",
"79207 https://www.rockymounttelegram.com/news/nation... \n",
"79207 https://www.rockymounttelegram.com/news/nation... \n",
"79207 https://www.rockymounttelegram.com/news/nation... \n",
"79208 https://www.greenwichtime.com/news/coronavirus... \n",
" DocumentIdentifier \\\n",
"1 https://www.thestar.com/news/gta/2020/03/17/ho... \n",
"1 https://www.thestar.com/news/gta/2020/03/17/ho... \n",
"1 https://www.thestar.com/news/gta/2020/03/17/ho... \n",
"1 https://www.thestar.com/news/gta/2020/03/17/ho... \n",
"13 https://kxrb.com/sioux-falls-among-10-most-rec... \n",
"... ... \n",
"115739 https://www.timesreporter.com/opinion/20200319... \n",
"115742 http://www.sun-sentinel.com/news/nationworld/c... \n",
"115742 http://www.sun-sentinel.com/news/nationworld/c... \n",
"115742 http://www.sun-sentinel.com/news/nationworld/c... \n",
"115743 https://www.newindianexpress.com/cities/delhi/... \n",
"\n",
" Location_Name Location_Lat Location_Lon \n",
"1 Hollywood, California, United States 34.0983 -118.327 \n",
"1 Anaheim, California, United States 33.8353 -117.915 \n",
"3 Philadelphia, Pennsylvania, United States 39.9523 -75.1638 \n",
"3 Cleveland, Ohio, United States 41.4995 -81.6954 \n",
"3 Valley High School, California, United States 33.7231 -117.901 \n",
"... ... ... ... \n",
"79207 Iditarod, Alaska, United States 62.5444 -158.095 \n",
"79207 Anchorage, Alaska, United States 61.2181 -149.9 \n",
"79207 Kaltag, Alaska, United States 64.3272 -158.722 \n",
"79207 Nulato, Alaska, United States 64.7194 -158.103 \n",
"79208 Farmington, Connecticut, United States 41.7198 -72.832 \n",
" Location_Name Location_Lat Location_Lon \n",
"1 Leamington, Warwickshire, United Kingdom 52.3 -1.53333 \n",
"1 Toronto, Ontario, Canada 43.6667 -79.4167 \n",
"1 Wexford, Wexford, Ireland 52.3342 -6.4575 \n",
"1 London, London, City Of, United Kingdom 51.5 -0.116667 \n",
"13 Sioux Falls, South Dakota, United States 43.55 -96.7003 \n",
"... ... ... ... \n",
"115739 East Cleveland, Ohio, United States 41.5331 -81.579 \n",
"115742 Beirut, Beyrouth, Lebanon 33.8719 35.5097 \n",
"115742 Washington, Washington, United States 38.8951 -77.0364 \n",
"115742 Dover, New Hampshire, United States 43.1979 -70.8737 \n",
"115743 Delhi, Delhi, India 28.6667 77.2167 \n",
"\n",
"[204440 rows x 7 columns]"
"[244229 rows x 7 columns]"
]
},
"execution_count": 61,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -453,7 +453,7 @@
},
{
"cell_type": "code",
"execution_count": 62,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
Expand Down
Loading

0 comments on commit 1a0348a

Please sign in to comment.