Skip to content

Commit

Permalink
ML DB integration
Browse files Browse the repository at this point in the history
  • Loading branch information
redsamurai committed Feb 11, 2019
1 parent 878b0f8 commit c61bb77
Show file tree
Hide file tree
Showing 2 changed files with 274 additions and 0 deletions.
4 changes: 4 additions & 0 deletions credentials.json
@@ -0,0 +1,4 @@
{
"username":"db_user",
"password":"db_pass"
}
270 changes: 270 additions & 0 deletions diabetes_redsamurai_db.ipynb
@@ -0,0 +1,270 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import cx_Oracle\n",
"\n",
"%load_ext sql"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Connected: hr@'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open('credentials.json') as f:\n",
" data = json.load(f)\n",
" username = data['username']\n",
" password = data['password']\n",
"\n",
"%sql oracle+cx_oracle://$username:$password@dbhost:1521/?service_name=ORCLPDB1.localdomain"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" * oracle+cx_oracle://hr:***@dbhost:1521/?service_name=ORCLPDB1.localdomain\n",
"0 rows affected.\n",
"Returning data to local variable result\n"
]
}
],
"source": [
"%%sql result <<\n",
"select TIMES_PREGNANT \"TIMES_PREGNANT\", GLUCOSE \"GLUCOSE\", BLOOD_PRESSURE \"BLOOD_PRESSURE\", SKIN_FOLD_THICK \"SKIN_FOLD_THICK\",\n",
" SERUM_INSULIN \"SERUM_INSULING\", MASS_INDEX \"MASS_INDEX\", DIABETES_PEDIGREE \"DIABETES_PEDIGREE\", \n",
" AGE \"AGE\", CLASS_VAR \"CLASS_VAR\" from PIMA_INDIANS_DIABETES"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>times_pregnant</th>\n",
" <th>glucose</th>\n",
" <th>blood_pressure</th>\n",
" <th>skin_fold_thick</th>\n",
" <th>serum_insuling</th>\n",
" <th>mass_index</th>\n",
" <th>diabetes_pedigree</th>\n",
" <th>age</th>\n",
" <th>class_var</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>106</td>\n",
" <td>70</td>\n",
" <td>28</td>\n",
" <td>135</td>\n",
" <td>34.2</td>\n",
" <td>0.142</td>\n",
" <td>22</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>155</td>\n",
" <td>52</td>\n",
" <td>27</td>\n",
" <td>540</td>\n",
" <td>38.7</td>\n",
" <td>0.24</td>\n",
" <td>25</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>101</td>\n",
" <td>58</td>\n",
" <td>35</td>\n",
" <td>90</td>\n",
" <td>21.8</td>\n",
" <td>0.155</td>\n",
" <td>22</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>120</td>\n",
" <td>80</td>\n",
" <td>48</td>\n",
" <td>200</td>\n",
" <td>38.9</td>\n",
" <td>1.162</td>\n",
" <td>41</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>11</td>\n",
" <td>127</td>\n",
" <td>106</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>39</td>\n",
" <td>0.19</td>\n",
" <td>51</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" times_pregnant glucose blood_pressure skin_fold_thick serum_insuling \\\n",
"0 1 106 70 28 135 \n",
"1 2 155 52 27 540 \n",
"2 2 101 58 35 90 \n",
"3 1 120 80 48 200 \n",
"4 11 127 106 0 0 \n",
"\n",
" mass_index diabetes_pedigree age class_var \n",
"0 34.2 0.142 22 0 \n",
"1 38.7 0.24 25 1 \n",
"2 21.8 0.155 22 0 \n",
"3 38.9 1.162 41 0 \n",
"4 39 0.19 51 0 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = result.DataFrame()\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of rows in dataset: {df.shape[0]}\n",
"0 500\n",
"1 268\n",
"Name: class_var, dtype: int64\n"
]
}
],
"source": [
"print('Number of rows in dataset: {df.shape[0]}')\n",
"print(df[df.columns[8]].value_counts())"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"times_pregnant 0.221898\n",
"glucose 0.466581\n",
"blood_pressure 0.065068\n",
"skin_fold_thick 0.074752\n",
"serum_insuling 0.130548\n",
"age 0.238356\n",
"class_var 1.000000\n",
"Name: class_var, dtype: float64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"corrs = df.corr()['class_var'].abs()\n",
"columns = corrs[corrs > .01].index\n",
"corrs = corrs.filter(columns)\n",
"corrs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit c61bb77

Please sign in to comment.