# Bringing in Data from local to jupyter lab

### Import pandas module

In [1]:
import pandas
import numpy

### Create test data

In [2]:
data = pandas.DataFrame(numpy.random.random((1500000,15)))
data.columns = list(map(lambda x: "column_" + str(x), data.columns))
data.shape

(1500000, 15)

### Store column headers and stringed list of column headers, this will be used for the create table and insert string statement

In [3]:
column_headers = list(data.columns)
str_column_headers = str(column_headers)

In [4]:
data.columns

Index(['column_0', 'column_1', 'column_2', 'column_3', 'column_4', 'column_5',
       'column_6', 'column_7', 'column_8', 'column_9', 'column_10',
       'column_11', 'column_12', 'column_13', 'column_14'],
      dtype='object')

### Preview Data using `head()`

In [5]:
data.head()

Unnamed: 0,column_0,column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14
0,0.819488,0.450859,0.196155,0.243797,0.066023,0.928607,0.432842,0.599661,0.733865,0.708273,0.212042,0.234705,0.248776,0.354679,0.088534
1,0.99721,0.018708,0.365672,0.102139,0.083723,0.997455,0.216228,0.415498,0.635426,0.614981,0.068651,0.817596,0.017903,0.454197,0.016334
2,0.801925,0.703716,0.202838,0.316367,0.262624,0.738596,0.382407,0.012787,0.741975,0.147594,0.881965,0.225401,0.920552,0.432822,0.657287
3,0.208031,0.353359,0.802278,0.507176,0.333599,0.95379,0.1957,0.430857,0.267356,0.461204,0.393286,0.508555,0.262098,0.24134,0.317473
4,0.644049,0.153404,0.102444,0.918676,0.337622,0.284506,0.272867,0.251775,0.285069,0.451261,0.210539,0.363574,0.443225,0.074742,0.612883


### View nulls, record counts, data types (OPTIONAL)

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1500000 entries, 0 to 1499999
Data columns (total 15 columns):
column_0     1500000 non-null float64
column_1     1500000 non-null float64
column_2     1500000 non-null float64
column_3     1500000 non-null float64
column_4     1500000 non-null float64
column_5     1500000 non-null float64
column_6     1500000 non-null float64
column_7     1500000 non-null float64
column_8     1500000 non-null float64
column_9     1500000 non-null float64
column_10    1500000 non-null float64
column_11    1500000 non-null float64
column_12    1500000 non-null float64
column_13    1500000 non-null float64
column_14    1500000 non-null float64
dtypes: float64(15)
memory usage: 171.7 MB


In [7]:
data.dtypes

column_0     float64
column_1     float64
column_2     float64
column_3     float64
column_4     float64
column_5     float64
column_6     float64
column_7     float64
column_8     float64
column_9     float64
column_10    float64
column_11    float64
column_12    float64
column_13    float64
column_14    float64
dtype: object

In [8]:
data.describe()

Unnamed: 0,column_0,column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14
count,1500000.0,1500000.0,1500000.0,1500000.0,1500000.0,1500000.0,1500000.0,1500000.0,1500000.0,1500000.0,1500000.0,1500000.0,1500000.0,1500000.0,1500000.0
mean,0.4997524,0.5000459,0.4997502,0.4997542,0.5001176,0.4994023,0.5002283,0.500205,0.4997998,0.4996814,0.5002229,0.4996751,0.4998075,0.4998782,0.4998318
std,0.2887587,0.2889125,0.2885478,0.2887467,0.2885466,0.2886784,0.2887687,0.2886714,0.2886862,0.2885735,0.2887036,0.2887935,0.2887127,0.2884556,0.2885787
min,1.841688e-07,1.585019e-06,8.127546e-07,1.027961e-06,6.550512e-08,1.424598e-07,6.876375e-07,2.671631e-06,3.93592e-07,9.181753e-07,6.442542e-07,1.169348e-06,6.454211e-07,1.482122e-07,1.789391e-07
25%,0.2496337,0.2493686,0.2498302,0.2495828,0.2503162,0.2493055,0.2501599,0.2503655,0.2496831,0.2498913,0.2502656,0.2495536,0.2499088,0.2500544,0.2499365
50%,0.4996744,0.5001412,0.4998787,0.4996898,0.4999824,0.499125,0.500165,0.5000156,0.4995079,0.4989926,0.5000936,0.49971,0.4997475,0.5001158,0.4995762
75%,0.7499829,0.7504984,0.7496383,0.7497816,0.7498976,0.7492122,0.7507346,0.7506833,0.7496217,0.749564,0.7507291,0.74982,0.7495729,0.7493433,0.7497265
max,0.9999998,0.9999999,0.9999993,0.9999988,1.0,0.9999999,0.9999995,0.9999992,0.9999996,1.0,0.9999993,0.9999973,1.0,0.9999998,0.9999994


# Pushing Data out to Azure Postgres Database
Adapted from https://docs.microsoft.com/en-us/azure/postgresql/connect-python

### Import psycopg2 module 

In [9]:
import psycopg2

  """)


### Set parameters for connection string

In [10]:
host = "mydemoserver.postgres.database.azure.com"
user = "mylogin@mydemoserver"
dbname = "mypgsqldb"
password = "<server_admin_password>"
sslmode = "require"

### Construct Connection string

In [11]:
#connection_string = "host={0} user={1} dbname={2} password={3} sslmode={4}".format(host, user, dbname, password, sslmode)
connection_string_placeholder = "host=localhost dbname=test_db user=danielcorcoran"

conn = psycopg2.connect(connection_string_placeholder) 
print("Connection established")

Connection established


### Instantiate `cursor()` object

In [12]:
cursor = conn.cursor()

### Set Table Name to be created in postgres

In [13]:
table_name = "test_data"

### Drop table if it exists

In [14]:
cursor.execute("DROP TABLE IF EXISTS {};".format(table_name))

print("Finished dropping table (if existed)")

Finished dropping table (if existed)


In [15]:
import datetime
s = datetime.datetime.now()

### Build Create Table query string

In [16]:
full_string = "CREATE TABLE {} ({});"

partial_string = ""

for index in range(len(column_headers)):
    
    column_name = column_headers[index]

    if " " in column_name:
        column_name = column_name.replace(" ","_")
    

    partial_string = partial_string + str(column_name) + " VARCHAR(255),"
        
partial_string = partial_string[:len(partial_string) - 1]

### Create Table 

In [17]:
create_table_string = full_string.format(table_name, partial_string)

cursor.execute(create_table_string)

print("Finished creating table")

Finished creating table


### Insert 

In [18]:
insert_string_skeleton = "INSERT INTO {} VALUES {};"
partial_insert_string = ""

In [19]:
for index in range(data.shape[0]):

    single_record_list = list(data.loc[index])

    single_record_list_strings = list(map(lambda item: str(item),single_record_list))

    row_to_insert = str(single_record_list_strings).replace("[","(").replace("]",")") + ","

    partial_insert_string = partial_insert_string + row_to_insert

partial_insert_string = partial_insert_string[:len(partial_insert_string)-1]

In [20]:
complete_insert_string = insert_string_skeleton.format(table_name, partial_insert_string)

In [21]:
cursor.execute(complete_insert_string)

### Cleanup

In [22]:
conn.commit()
cursor.close()
conn.close()

print(datetime.datetime.now() - s)

0:09:10.868493
