diff --git a/credentials.json b/credentials.json new file mode 100644 index 0000000..0e0bec7 --- /dev/null +++ b/credentials.json @@ -0,0 +1,4 @@ +{ + "username":"db_user", + "password":"db_pass" +} \ No newline at end of file diff --git a/diabetes_redsamurai_db.ipynb b/diabetes_redsamurai_db.ipynb new file mode 100644 index 0000000..e3f3d58 --- /dev/null +++ b/diabetes_redsamurai_db.ipynb @@ -0,0 +1,270 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import cx_Oracle\n", + "\n", + "%load_ext sql" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Connected: hr@'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "with open('credentials.json') as f:\n", + " data = json.load(f)\n", + " username = data['username']\n", + " password = data['password']\n", + "\n", + "%sql oracle+cx_oracle://$username:$password@dbhost:1521/?service_name=ORCLPDB1.localdomain" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " * oracle+cx_oracle://hr:***@dbhost:1521/?service_name=ORCLPDB1.localdomain\n", + "0 rows affected.\n", + "Returning data to local variable result\n" + ] + } + ], + "source": [ + "%%sql result <<\n", + "select TIMES_PREGNANT \"TIMES_PREGNANT\", GLUCOSE \"GLUCOSE\", BLOOD_PRESSURE \"BLOOD_PRESSURE\", SKIN_FOLD_THICK \"SKIN_FOLD_THICK\",\n", + " SERUM_INSULIN \"SERUM_INSULING\", MASS_INDEX \"MASS_INDEX\", DIABETES_PEDIGREE \"DIABETES_PEDIGREE\", \n", + " AGE \"AGE\", CLASS_VAR \"CLASS_VAR\" from PIMA_INDIANS_DIABETES" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
times_pregnantglucoseblood_pressureskin_fold_thickserum_insulingmass_indexdiabetes_pedigreeageclass_var
01106702813534.20.142220
12155522754038.70.24251
2210158359021.80.155220
31120804820038.91.162410
41112710600390.19510
\n", + "
" + ], + "text/plain": [ + " times_pregnant glucose blood_pressure skin_fold_thick serum_insuling \\\n", + "0 1 106 70 28 135 \n", + "1 2 155 52 27 540 \n", + "2 2 101 58 35 90 \n", + "3 1 120 80 48 200 \n", + "4 11 127 106 0 0 \n", + "\n", + " mass_index diabetes_pedigree age class_var \n", + "0 34.2 0.142 22 0 \n", + "1 38.7 0.24 25 1 \n", + "2 21.8 0.155 22 0 \n", + "3 38.9 1.162 41 0 \n", + "4 39 0.19 51 0 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = result.DataFrame()\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of rows in dataset: {df.shape[0]}\n", + "0 500\n", + "1 268\n", + "Name: class_var, dtype: int64\n" + ] + } + ], + "source": [ + "print('Number of rows in dataset: {df.shape[0]}')\n", + "print(df[df.columns[8]].value_counts())" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "times_pregnant 0.221898\n", + "glucose 0.466581\n", + "blood_pressure 0.065068\n", + "skin_fold_thick 0.074752\n", + "serum_insuling 0.130548\n", + "age 0.238356\n", + "class_var 1.000000\n", + "Name: class_var, dtype: float64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "corrs = df.corr()['class_var'].abs()\n", + "columns = corrs[corrs > .01].index\n", + "corrs = corrs.filter(columns)\n", + "corrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}