# Reading and inserting data from postgresql using python

### Import modules

In [1]:
import psycopg2

import pandas

  """)


## Pulling into pandas dataframe from postgres table

### Create connection to database

In [2]:
connection = psycopg2.connect("host=localhost dbname=test_db user=danielcorcoran")

### Create cursor object

In [3]:
cursor = connection.cursor()

### `execute()` method can pull data from database

In [4]:
cursor.execute("SELECT * FROM persons WHERE id > 5")

In [5]:
all_records = cursor.fetchall()

### Store metadata

In [6]:
columns = []

for item in cursor.description:
    name = item.name
    columns.append(name)

### Move data into pandas dataframe

In [7]:
data = pandas.DataFrame(all_records)

In [8]:
data

Unnamed: 0,0,1
0,34.0,jimmy fred
1,45.0,tommy grouch
2,23.0,jane doe
3,65.0,michael haspen
4,34.0,jimmy fred
5,45.0,tommy grouch
6,23.0,jane doe
7,65.0,michael haspen
8,34.0,jimmy fred
9,45.0,tommy grouch


### Rename columns

In [9]:
data.columns = columns

In [10]:
data

Unnamed: 0,id,name
0,34.0,jimmy fred
1,45.0,tommy grouch
2,23.0,jane doe
3,65.0,michael haspen
4,34.0,jimmy fred
5,45.0,tommy grouch
6,23.0,jane doe
7,65.0,michael haspen
8,34.0,jimmy fred
9,45.0,tommy grouch


## Inserting, from pandas dataframe to postgres table

### Create connection to database

In [11]:
connection = psycopg2.connect("host=localhost dbname=test_db user=postgres")

### Create cursor object

In [12]:
cursor = connection.cursor()

### Go through pandas dataframe and insert values into postgres

In [13]:
table_name = "persons"

for index in range(data.shape[0]):
    
    id_value = data.loc[index, "id"]
    name_value = data.loc[index, "name"]
    sql_string = "INSERT INTO {} VALUES ({}, '{}')".format(table_name,
                                                       id_value,
                                                       name_value)
    print(sql_string)
    cursor.execute(sql_string)

INSERT INTO persons VALUES (34.0, 'jimmy fred')
INSERT INTO persons VALUES (45.0, 'tommy grouch')
INSERT INTO persons VALUES (23.0, 'jane doe')
INSERT INTO persons VALUES (65.0, 'michael haspen')
INSERT INTO persons VALUES (34.0, 'jimmy fred')
INSERT INTO persons VALUES (45.0, 'tommy grouch')
INSERT INTO persons VALUES (23.0, 'jane doe')
INSERT INTO persons VALUES (65.0, 'michael haspen')
INSERT INTO persons VALUES (34.0, 'jimmy fred')
INSERT INTO persons VALUES (45.0, 'tommy grouch')
INSERT INTO persons VALUES (23.0, 'jane doe')
INSERT INTO persons VALUES (65.0, 'michael haspen')
INSERT INTO persons VALUES (34.0, 'jimmy fred')
INSERT INTO persons VALUES (45.0, 'tommy grouch')
INSERT INTO persons VALUES (23.0, 'jane doe')
INSERT INTO persons VALUES (65.0, 'michael haspen')


### Commit connection to confirm changes in postgres

In [14]:
connection.commit()

### Close connection and cursor

In [15]:
cursor.close()

In [16]:
connection.close()

## Testing Insert Speeds

### Create test table

In [17]:
connection = psycopg2.connect("host=localhost dbname=test_db user=postgres")

In [18]:
cursor = connection.cursor()

In [19]:
cursor.execute("""CREATE TABLE numbers(
              column1 FLOAT8,
              column2 FLOAT8,
              column3 FLOAT8,
              column4 FLOAT8,
              column5 FLOAT8)
              """)

In [20]:
connection.commit()

### Create test data 1million records by 5 columns

In [21]:
import numpy; import pandas

In [22]:
randomdata = numpy.random.random((1000000,5))

In [23]:
data = pandas.DataFrame(randomdata)

In [24]:
data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000000 entries, 0 to 999999
Data columns (total 5 columns):
0    1000000 non-null float64
1    1000000 non-null float64
2    1000000 non-null float64
3    1000000 non-null float64
4    1000000 non-null float64
dtypes: float64(5)
memory usage: 38.1 MB


In [25]:
data.columns = ["column1", "column2",
               "column3", "column4",
               "column5"]

In [26]:
data.head()

Unnamed: 0,column1,column2,column3,column4,column5
0,0.25753,0.25608,0.306458,0.463369,0.668492
1,0.586905,0.933924,0.430284,0.764037,0.936063
2,0.232172,0.422021,0.325718,0.95689,0.54727
3,0.934145,0.089804,0.368024,0.028631,0.158348
4,0.476079,0.21436,0.57314,0.109177,0.457492


In [27]:
from datetime import datetime

In [28]:
s = datetime.now()


#process
for index in range(data.shape[0]):
    
    if index % 10000 ==0:
        print("processsing index ", index)

    values_tuple_string = str(tuple(data.loc[index]))
    cursor.execute("INSERT INTO numbers VALUES {}".format(values_tuple_string))

print(datetime.now() - s)

processsing index  0
processsing index  10000
processsing index  20000
processsing index  30000
processsing index  40000
processsing index  50000
processsing index  60000
processsing index  70000
processsing index  80000
processsing index  90000
processsing index  100000
processsing index  110000
processsing index  120000
processsing index  130000
processsing index  140000
processsing index  150000
processsing index  160000
processsing index  170000
processsing index  180000
processsing index  190000
processsing index  200000
processsing index  210000
processsing index  220000
processsing index  230000
processsing index  240000
processsing index  250000
processsing index  260000
processsing index  270000
processsing index  280000
processsing index  290000
processsing index  300000
processsing index  310000
processsing index  320000
processsing index  330000
processsing index  340000
processsing index  350000
processsing index  360000
processsing index  370000
processsing index  380000


In [29]:
connection.commit()

In [30]:
cursor.execute("SELECT * FROM numbers")

In [31]:
numpytable_records = cursor.fetchall()

In [32]:
data = pandas.DataFrame(numpytable_records)