# Bringing in Data from local to jupyter lab

### Import relevant modules

In [1]:
import pandas
import numpy
import datetime
import sqlalchemy

### Create test data (1.5mill rows by 15 columns)

In [2]:
data = pandas.DataFrame(numpy.random.random((1500000,15)))
data.columns = list(map(lambda x: "column_" + str(x), data.columns))
data.shape

(1500000, 15)

## Inspect Data (Optional)

### Preview Data using `head()`

In [4]:
data.head()

Unnamed: 0,column_0,column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14
0,0.907253,0.912571,0.644448,0.796385,0.059683,0.902312,0.535854,0.408944,0.711681,0.998766,0.043149,0.577451,0.081842,0.826245,0.707497
1,0.731465,0.857206,0.310385,0.903593,0.258464,0.445979,0.317269,0.80865,0.48364,0.429431,0.774635,0.164513,0.859866,0.713552,0.367858
2,0.35366,0.904998,0.289056,0.785458,0.382347,0.125183,0.695719,0.515036,0.757254,0.555438,0.434418,0.726036,0.1181,0.240817,0.722243
3,0.574601,0.180775,0.148469,0.718849,0.35621,0.918186,0.292899,0.829739,0.379515,0.661392,0.110387,0.836565,0.838703,0.577015,0.062667
4,0.962297,0.786531,0.174909,0.287946,0.196679,0.671062,0.677766,0.212806,0.697945,0.988899,0.115131,0.247872,0.315066,0.311611,0.17027


### View nulls, record counts, data types

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1500000 entries, 0 to 1499999
Data columns (total 15 columns):
column_0     1500000 non-null float64
column_1     1500000 non-null float64
column_2     1500000 non-null float64
column_3     1500000 non-null float64
column_4     1500000 non-null float64
column_5     1500000 non-null float64
column_6     1500000 non-null float64
column_7     1500000 non-null float64
column_8     1500000 non-null float64
column_9     1500000 non-null float64
column_10    1500000 non-null float64
column_11    1500000 non-null float64
column_12    1500000 non-null float64
column_13    1500000 non-null float64
column_14    1500000 non-null float64
dtypes: float64(15)
memory usage: 171.7 MB


In [6]:
data.dtypes

column_0     float64
column_1     float64
column_2     float64
column_3     float64
column_4     float64
column_5     float64
column_6     float64
column_7     float64
column_8     float64
column_9     float64
column_10    float64
column_11    float64
column_12    float64
column_13    float64
column_14    float64
dtype: object

In [7]:
data.describe()

Unnamed: 0,column_0,column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14
count,1500000.0,1500000.0,1500000.0,1500000.0,1500000.0,1500000.0,1500000.0,1500000.0,1500000.0,1500000.0,1500000.0,1500000.0,1500000.0,1500000.0,1500000.0
mean,0.4998647,0.5001555,0.5000448,0.4994558,0.5004158,0.4998051,0.4998985,0.4998021,0.5001593,0.4998391,0.4999596,0.5002259,0.4997802,0.5000005,0.5000221
std,0.2887589,0.2885093,0.2886951,0.2886774,0.2886812,0.2886077,0.2886447,0.2886782,0.2887161,0.2886229,0.2884207,0.2885691,0.2886339,0.2886592,0.2886726
min,3.186563e-07,3.057718e-07,7.111131e-07,6.75803e-07,1.983624e-07,2.432259e-07,7.402702e-07,2.502763e-07,5.152003e-08,1.755968e-06,3.449479e-08,3.217525e-07,1.56386e-06,3.763773e-07,9.765108e-07
25%,0.2497959,0.2504716,0.2503982,0.2494077,0.2503638,0.2498553,0.2498044,0.24976,0.2503981,0.2497844,0.250096,0.2502173,0.2497544,0.249947,0.2500332
50%,0.4997462,0.5000416,0.4994739,0.4990483,0.5010365,0.4997113,0.4999903,0.49998,0.5002569,0.4997194,0.4997551,0.5005027,0.4994691,0.4999375,0.5003021
75%,0.7498618,0.7496953,0.7503617,0.7492383,0.7505282,0.7497859,0.7498914,0.7497006,0.7502396,0.749836,0.7495035,0.7500763,0.7496588,0.7499023,0.7497909
max,0.9999992,1.0,0.9999997,0.9999981,0.9999979,0.9999991,0.9999993,0.9999996,0.9999983,0.9999992,0.9999998,0.9999995,0.9999999,1.0,0.9999994


# Pushing Data out to Azure Postgres Database
Adapted from https://docs.microsoft.com/en-us/azure/postgresql/connect-python

### Create engine passing in connection string in format `databasedialect://user:password@host/databasename`

In [8]:
connection_string = "postgresql://danielcorcoran:12345@localhost/test_db"
engine = sqlalchemy.create_engine(connection_string)
con = engine.connect()

  """)


### Show table names in database

In [9]:
print(engine.table_names())

['testkeys', 'leada', 'persons', 'switch_types', 'numbers', 'anotherone', 'planets']


### Set Table Name to be created in postgres

In [10]:
table_name = "test_data"

### Push pandas dataframe using `to_sql()` method

In [11]:
start_time = datetime.datetime.now()

data.to_sql(table_name, con)

con.close()

print("Finished in:", datetime.datetime.now() - start_time)

Finished in: 0:06:59.898712
