diff --git a/demos/demos_databases_apis/sql/postgres_alchemy_pypyodbc.ipynb b/demos/demos_databases_apis/sql/postgres_alchemy_pypyodbc.ipynb new file mode 100644 index 0000000000..80133ec1df --- /dev/null +++ b/demos/demos_databases_apis/sql/postgres_alchemy_pypyodbc.ipynb @@ -0,0 +1,414 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# SQL Example\n", + "\n", + "Get & plot data from a SQL DB.\n", + "\n", + "* For drivers, this notebook uses pure Python ODBC (`pypyodbc`) and connects to Postgres\n", + "* Creates a table in the DB if does not exist" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creds\n", + "\n", + "* Create ODBC connection string\n", + "* We recommend changing `user`/`pwd` to ENV_VARS or files rather than visible hard-coded strings" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "user = \"myadmin\"\n", + "pwd = \"mypassword\"\n", + "server = \"rds-test.zzz.us-west-2.rds.amazonaws.com:5432/postgres\"\n", + "#import os\n", + "#user = os.environ['MY_USER']\n", + "#pwd = os.environ['MY_PWD']\n", + "#server = os.environ['MY_SERVER']\n", + "\n", + "db_string = \"postgres://\" + user + \":\" + pwd + \"@\" + server" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "import graphistry\n", + "#graphistry.register(key='MY_API_KEY', server='my.server.com')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Optional: Install ODBC drivers for Postgres" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Optional: Install a faster custom ODBC driver by logging in to jupyter docker as root:\n", + "\n", + "# $ ssh -i key.pem ubuntu@my_public_ip\n", + "# ubuntu@my_private_ip $ docker exec -it -u root graphistry_notebook_1 bash\n", + "# root@601a8af7ea4c $ apt-get update\n", + "# root@601a8af7ea4c $ apt-get install unixodbc\n", + "# root@601a8af7ea4c $ pip3 install pyodbc" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# If not already exists...\n", + "\n", + "!pip3 install wheel -q\n", + "!pip3 install pypyodbc -q\n", + "!pip3 install sqlalchemy -q" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Connect to DB" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import sqlalchemy as db\n", + "from sqlalchemy import create_engine, MetaData, Table, Column, Integer, Float, String\n", + "from sqlalchemy.orm import sessionmaker" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "engine = create_engine(db_string)\n", + "Session = sessionmaker(bind=engine)\n", + "session = Session()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Optional: Prepopulate table \"my_table\"" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['my_table']" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "engine.table_names()" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "if not engine.dialect.has_table(engine, 'my_table'): # If table don't exist, Create.\n", + " metadata = MetaData(engine)\n", + " mytable = Table('my_table', metadata,\n", + " Column('Id', Integer, primary_key=True, nullable=False), \n", + " Column('Country', String),\n", + " Column('Brand', String),\n", + " Column('Price', Float))\n", + " metadata.create_all()\n", + " engine.execute(\n", + " mytable.insert(),\n", + " [{\"Country\": \"c\" + str(x % 2), \"Brand\": \"b_\" + str(x), \"Price\": x * 2} for x in range(0, 10)]) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get data" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# rows 10\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdCountryBrandPrice
67c0b_612.0
56c1b_510.0
78c1b_714.0
\n", + "
" + ], + "text/plain": [ + " Id Country Brand Price\n", + "6 7 c0 b_6 12.0\n", + "5 6 c1 b_5 10.0\n", + "7 8 c1 b_7 14.0" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result = engine.execute(\"SELECT * FROM my_table LIMIT 10\")\n", + "df = pd.DataFrame(result.fetchall(), columns=result.keys())\n", + "print('# rows', len(df))\n", + "df.sample(min(3, len(df)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plot\n", + "\n", + "Several variants:\n", + "1. Treat each row & cell value as a node, and connect row<>cell values\n", + "2. Treat each cell value as an edge, and connect all cell values together when they occur on the same row\n", + "3. Treat each cell value as an edge, and specify which columns to to connect values together on" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# links 30\n", + "# events 10\n", + "# attrib entities 22\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/graphistry/hyper.py:171: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n", + "of pandas will change to not sort by default.\n", + "\n", + "To accept the future behavior, pass 'sort=False'.\n", + "\n", + "To retain the current behavior and silence the warning, pass 'sort=True'.\n", + "\n", + " nodes = pd.concat([entities, event_entities], ignore_index=True).reset_index(drop=True)\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hg = graphistry.hypergraph(df, ['Id', 'Country', 'Brand'])\n", + "hg['graph'].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# links 30\n", + "# events 10\n", + "# attrib entities 22\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hg = graphistry.hypergraph(\n", + " df, \n", + " ['Id', 'Country', 'Brand'],\n", + " direct=True)\n", + "hg['graph'].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hg = graphistry.hypergraph(\n", + " df, \n", + " ['Id', 'Country', 'Brand'],\n", + " direct=True,\n", + " opts={\n", + " 'EDGES': {\n", + " 'Id': ['Country', 'Brand'],\n", + " 'Brand': ['Country']\n", + " }\n", + " })\n", + "hg['graph'].plot()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}