# Bringing in Data from local to jupyter lab

### Import pandas module

In [1]:
import pandas

### Read in data from path

In [2]:
data_path = "../data/planets.csv"

data = pandas.read_csv(data_path)

### Preview Data using `head()`

In [3]:
data.head()

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.3,7.1,77.4,2006
1,Radial Velocity,1,874.774,2.21,56.95,2008
2,Radial Velocity,1,763.0,2.6,19.84,2011
3,Radial Velocity,1,326.03,19.4,110.62,2007
4,Radial Velocity,1,516.22,10.5,119.47,2009


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1035 entries, 0 to 1034
Data columns (total 6 columns):
method            1035 non-null object
number            1035 non-null int64
orbital_period    992 non-null float64
mass              513 non-null float64
distance          808 non-null float64
year              1035 non-null int64
dtypes: float64(3), int64(2), object(1)
memory usage: 48.6+ KB


In [5]:
data.dtypes

method             object
number              int64
orbital_period    float64
mass              float64
distance          float64
year                int64
dtype: object

In [13]:
column_headers = list(data.columns)
str_column_headers = str(column_headers)

In [7]:
data.describe()

Unnamed: 0,number,orbital_period,mass,distance,year
count,1035.0,992.0,513.0,808.0,1035.0
mean,1.785507,2002.917596,2.638161,264.069282,2009.070531
std,1.240976,26014.728304,3.818617,733.116493,3.972567
min,1.0,0.090706,0.0036,1.35,1989.0
25%,1.0,5.44254,0.229,32.56,2007.0
50%,1.0,39.9795,1.26,55.25,2010.0
75%,2.0,526.005,3.04,178.5,2012.0
max,7.0,730000.0,25.0,8500.0,2014.0


# Pushing Data out to Azure Postgres Database
Adapted from https://docs.microsoft.com/en-us/azure/postgresql/connect-python

### Import psycopg2 module 

In [None]:
import psycopg2

### Set parameters for connection string

In [None]:
# Update connection string information obtained from the portal
host = "mydemoserver.postgres.database.azure.com"
user = "mylogin@mydemoserver"
dbname = "mypgsqldb"
password = "<server_admin_password>"
sslmode = "require"

### Construct Connection string

In [None]:
conn_string = "host={0} user={1} dbname={2} password={3} sslmode={4}".format(host, user, dbname, password, sslmode)
conn = psycopg2.connect(conn_string) 
print "Connection established"

### Instantiate `cursor()` object

In [None]:
cursor = conn.cursor()

### Set Table Name to be created in postgres

In [19]:
table_name = "planets"

### Drop table if it exists

In [None]:
cursor.execute("DROP TABLE IF EXISTS {};".format(table_name))

print "Finished dropping table (if existed)"

### Build Create Table query string

In [20]:
full_string = "CREATE TABLE {} {};"

partial_string = "("

for index in range(len(column_headers)):
    
    column_name = column_headers[index]
    
    if index != len(column_headers)-1:
        partial_string = partial_string + column_name + " VARCHAR(255),"
    else:
        partial_string = partial_string + column_name + " VARCHAR(255))"

### Create Table 

In [None]:
create_table_string = full_string.format(table_name, partial_string)

cursor.execute(create_table_string)

print "Finished creating table"

### Insert 

In [56]:
insert_string_skeleton = "INSERT INTO {} VALUES {};"
partial_insert_string = ""

In [57]:
for index in range(data.shape[0]):

    single_record_list = list(data.loc[index])

    single_record_list_strings = list(map(lambda item: str(item),x))

    row_to_insert = str(single_record_list_strings).replace("[","(").replace("]",")") + ","

    partial_insert_string = partial_insert_string + row_to_insert

partial_insert_string = partial_insert_string[:len(partial_insert_string)-1]

In [58]:
complete_insert_string = insert_string_skeleton.format(table_name, partial_insert_string)

In [None]:
cursor.execute(complete_insert_string)

### Cleanup

In [None]:
conn.commit()
cursor.close()
conn.close()