Skip to content

Commit

Permalink
gisfromscratch#2 removed seaborn dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
Jan Tschada committed Mar 19, 2020
1 parent 6c54c02 commit ae37469
Showing 1 changed file with 46 additions and 15 deletions.
61 changes: 46 additions & 15 deletions notebooks/Knowledge Graph Queries.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,30 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting gdelt\n",
" Using cached https://files.pythonhosted.org/packages/65/f9/a3d5111c8f17334b1752c32aedaab0d01ab4324bf26417bd41890d5b25d0/gdelt-0.1.10.6.1-py2.py3-none-any.whl\n",
"Requirement already satisfied: requests in c:\\program files\\arcgis\\pro\\bin\\python\\envs\\arcgispro-py3\\lib\\site-packages (from gdelt) (2.22.0)\n",
"Requirement already satisfied: numpy in c:\\program files\\arcgis\\pro\\bin\\python\\envs\\arcgispro-py3\\lib\\site-packages (from gdelt) (1.16.5)\n",
"Requirement already satisfied: pandas>=0.20.3 in c:\\program files\\arcgis\\pro\\bin\\python\\envs\\arcgispro-py3\\lib\\site-packages (from gdelt) (0.25.1)\n",
"Requirement already satisfied: python-dateutil in c:\\program files\\arcgis\\pro\\bin\\python\\envs\\arcgispro-py3\\lib\\site-packages (from gdelt) (2.8.0)\n",
"Requirement already satisfied: certifi>=2017.4.17 in c:\\program files\\arcgis\\pro\\bin\\python\\envs\\arcgispro-py3\\lib\\site-packages (from requests->gdelt) (2019.9.11)\n",
"Requirement already satisfied: idna<2.9,>=2.5 in c:\\program files\\arcgis\\pro\\bin\\python\\envs\\arcgispro-py3\\lib\\site-packages (from requests->gdelt) (2.8)\n",
"Requirement already satisfied: chardet<3.1.0,>=3.0.2 in c:\\program files\\arcgis\\pro\\bin\\python\\envs\\arcgispro-py3\\lib\\site-packages (from requests->gdelt) (3.0.4)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in c:\\program files\\arcgis\\pro\\bin\\python\\envs\\arcgispro-py3\\lib\\site-packages (from requests->gdelt) (1.24.2)\n",
"Requirement already satisfied: pytz>=2017.2 in c:\\program files\\arcgis\\pro\\bin\\python\\envs\\arcgispro-py3\\lib\\site-packages\\pytz-2019.2-py3.6.egg (from pandas>=0.20.3->gdelt) (2019.2)\n",
"Requirement already satisfied: six>=1.5 in c:\\program files\\arcgis\\pro\\bin\\python\\envs\\arcgispro-py3\\lib\\site-packages (from python-dateutil->gdelt) (1.12.0)\n",
"Installing collected packages: gdelt\n",
"Successfully installed gdelt-0.1.10.6\n"
]
}
],
"source": [
"!pip install --user gdelt"
]
Expand All @@ -25,15 +46,14 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from datetime import date, timedelta\n",
"from gdelt import gdelt as gdelt_client\n",
"import matplotlib.pyplot as plot\n",
"import pandas\n",
"import seaborn\n",
"import tempfile"
]
},
Expand All @@ -48,7 +68,7 @@
},
{
"cell_type": "code",
"execution_count": 56,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -88,9 +108,27 @@
},
{
"cell_type": "code",
"execution_count": 57,
"execution_count": 6,
"metadata": {},
"outputs": [],
"outputs": [
{
"ename": "ReadTimeout",
"evalue": "None: None",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mRemoteTraceback\u001b[0m Traceback (most recent call last)",
"\u001b[1;31mRemoteTraceback\u001b[0m: \n\"\"\"\nTraceback (most recent call last):\n File \"C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\lib\\site-packages\\urllib3\\connectionpool.py\", line 384, in _make_request\n six.raise_from(e, None)\n File \"<string>\", line 2, in raise_from\n File \"C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\lib\\site-packages\\urllib3\\connectionpool.py\", line 380, in _make_request\n httplib_response = conn.getresponse()\n File \"C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\lib\\http\\client.py\", line 1346, in getresponse\n response.begin()\n File \"C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\lib\\http\\client.py\", line 307, in begin\n version, status, reason = self._read_status()\n File \"C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\lib\\http\\client.py\", line 268, in _read_status\n line = str(self.fp.readline(_MAXLINE + 1), \"iso-8859-1\")\n File \"C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\lib\\socket.py\", line 586, in readinto\n return self._sock.recv_into(b)\nsocket.timeout: timed out\n\nDuring handling of the above exception, another exception occurred:\n\nTraceback (most recent call last):\n File \"C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\lib\\site-packages\\requests\\adapters.py\", line 449, in send\n timeout=timeout\n File \"C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\lib\\site-packages\\urllib3\\connectionpool.py\", line 638, in urlopen\n _stacktrace=sys.exc_info()[2])\n File \"C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\lib\\site-packages\\urllib3\\util\\retry.py\", line 368, in increment\n raise six.reraise(type(error), error, _stacktrace)\n File \"C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\lib\\site-packages\\urllib3\\packages\\six.py\", line 686, in reraise\n raise value\n File \"C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\lib\\site-packages\\urllib3\\connectionpool.py\", line 600, in urlopen\n chunked=chunked)\n File \"C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\lib\\site-packages\\urllib3\\connectionpool.py\", line 386, in _make_request\n self._raise_timeout(err=e, url=url, timeout_value=read_timeout)\n File \"C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\lib\\site-packages\\urllib3\\connectionpool.py\", line 306, in _raise_timeout\n raise ReadTimeoutError(self, url, \"Read timed out. (read timeout=%s)\" % timeout_value)\nurllib3.exceptions.ReadTimeoutError: HTTPConnectionPool(host='data.gdeltproject.org', port=80): Read timed out. (read timeout=5)\n\nDuring handling of the above exception, another exception occurred:\n\nTraceback (most recent call last):\n File \"C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\lib\\multiprocessing\\pool.py\", line 119, in worker\n result = (True, func(*args, **kwds))\n File \"C:\\Users\\jts\\AppData\\Roaming\\Python\\Python36\\site-packages\\gdelt\\parallel.py\", line 62, in _mp_worker\n r = requests.get(url, proxies=proxies, timeout=5)\n File \"C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\lib\\site-packages\\requests\\api.py\", line 75, in get\n return request('get', url, params=params, **kwargs)\n File \"C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\lib\\site-packages\\requests\\api.py\", line 60, in request\n return session.request(method=method, url=url, **kwargs)\n File \"C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\lib\\site-packages\\requests\\sessions.py\", line 533, in request\n resp = self.send(prep, **send_kwargs)\n File \"C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\lib\\site-packages\\requests\\sessions.py\", line 646, in send\n r = adapter.send(request, **kwargs)\n File \"C:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\lib\\site-packages\\requests\\adapters.py\", line 529, in send\n raise ReadTimeout(e, request=request)\nrequests.exceptions.ReadTimeout: HTTPConnectionPool(host='data.gdeltproject.org', port=80): Read timed out. (read timeout=5)\n\"\"\"",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[1;31mReadTimeout\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-6-d29845994187>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0mend_date\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdate\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtoday\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m-\u001b[0m\u001b[0mtimedelta\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdays\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[0mstart_date\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mend_date\u001b[0m\u001b[1;33m-\u001b[0m\u001b[0mtimedelta\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdays\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mgraph\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_graph_range\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstart_date\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mend_date\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcoverage\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[0mcorona_records\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgraph\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mgraph\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"V2Themes\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcontains\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"TAX_DISEASE_CORONAVIRUS\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mna\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m<ipython-input-4-ab31afe241d6>\u001b[0m in \u001b[0;36mget_graph_range\u001b[1;34m(from_date, to_date, coverage)\u001b[0m\n\u001b[0;32m 16\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mday\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdate_range\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdays\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 17\u001b[0m \u001b[0mdate\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfrom_date\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mtimedelta\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdays\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mday\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 18\u001b[1;33m \u001b[0mgraph_temp\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mclient\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSearch\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdate\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstrftime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"%Y %m %d\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtable\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"gkg\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcoverage\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcoverage\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 19\u001b[0m \u001b[0mgraph_temp\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgraph_temp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m{\u001b[0m\u001b[1;34m\"DATE\"\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 20\u001b[0m \u001b[0mgraph_temp\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"DATE\"\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgraph_temp\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"DATE\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mdateStr\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mdateStr\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;36m14\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\AppData\\Roaming\\Python\\Python36\\site-packages\\gdelt\\base.py\u001b[0m in \u001b[0;36mSearch\u001b[1;34m(self, date, table, coverage, translation, output, queryTime, normcols)\u001b[0m\n\u001b[0;32m 634\u001b[0m \u001b[0mpool\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mNoDaemonProcessPool\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mprocesses\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcpu_count\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 635\u001b[0m downloaded_dfs = list(pool.imap_unordered(_mp_worker,\n\u001b[1;32m--> 636\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdownload_list\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 637\u001b[0m ))\n\u001b[0;32m 638\u001b[0m \u001b[0mpool\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mC:\\Program Files\\ArcGIS\\Pro\\bin\\Python\\envs\\arcgispro-py3\\lib\\multiprocessing\\pool.py\u001b[0m in \u001b[0;36mnext\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 733\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0msuccess\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 734\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 735\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 736\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 737\u001b[0m \u001b[0m__next__\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnext\u001b[0m \u001b[1;31m# XXX\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mReadTimeout\u001b[0m: None: None"
]
}
],
"source": [
"end_date = date.today()-timedelta(days=0)\n",
"start_date = end_date-timedelta(days=1)\n",
Expand All @@ -100,13 +138,6 @@
"report_date = end_date"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Save records to temp"
]
},
{
"cell_type": "code",
"execution_count": 58,
Expand Down Expand Up @@ -486,7 +517,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
"version": "3.6.9"
}
},
"nbformat": 4,
Expand Down

0 comments on commit ae37469

Please sign in to comment.