In [3]:
import pandas as pd
from more_sqlalchemy import get_sql_types

#### Delete and start fresh

In [2]:
# On Windows, go delete the db from the folder
!rm ./databases/slide_example.db

#### Make a connection to a new database

In [3]:
from sqlalchemy import create_engine
engine = create_engine('sqlite:///databases/slide_example.db', echo=False)

#### Read in the table from a csv

In [4]:
table1 = pd.read_csv('./data/Table1.csv')
table1.head()

Unnamed: 0,D,G
0,5,a
1,3,a
2,4,b
3,2,b
4,3,c


#### Create a SQL type dictionary

In [5]:
sql_types = get_sql_types(table1)
sql_types

{'D': sqlalchemy.sql.sqltypes.Integer, 'G': sqlalchemy.sql.sqltypes.String}

#### Create and execute the SQL CREATE TABLE schema

In [6]:
schema = pd.io.sql.get_schema(table1, # dataframe
                              'Table1', # name in SQL db
                              keys=None, # primary key
                              con=engine, # connection
                              dtype=sql_types # SQL types
)
print(schema)
engine.execute(schema)


CREATE TABLE "Table1" (
	"D" INTEGER, 
	"G" VARCHAR
)




<sqlalchemy.engine.result.ResultProxy at 0x11d26f668>

#### Append the data to the new table

In [7]:
table1.to_sql('Table1', 
              con=engine, 
              dtype=sql_types, 
              index=False,
              if_exists='append')

In [4]:
def create_table(df, table_name, con, keys=None):
    sql_types = get_sql_types(df)
    schema = pd.io.sql.get_schema(df,
                                  table_name,
                                  keys=keys,
                                  con=con,
                                  dtype=sql_types)
    con.execute(schema)
    df.to_sql(table_name, 
              con=con, 
              dtype=sql_types, 
              index=False,
              if_exists='append')

In [9]:
t1 = pd.read_csv('./data/T1.csv')
t1.head()

Unnamed: 0,G1,X
0,a,5
1,a,2
2,b,3
3,b,1


In [10]:
create_table(t1, 'T1', engine)

In [11]:
t2 = pd.read_csv('./data/T2.csv')
t2.head()

Unnamed: 0,G2,Y
0,a,3
1,a,2
2,b,4
3,b,1
4,c,1


In [12]:
create_table(t2, 'T2', engine)

#### Test the results

In [13]:
%load_ext pyensae

In [14]:
%SQL_connect ./databases/slide_example.db

<pyensae.sql.sql_interface_database.InterfaceSQLDatabase at 0x11d2ae320>

In [15]:
%SQL_tables

['T1', 'T2', 'Table1']

In [16]:
%SQL_schema Table1

{0: ('D', int), 1: ('G', str)}

In [19]:
%%SQL
SELECT *
FROM Table1

Unnamed: 0,D,G
0,5,a
1,3,a
2,4,b
3,2,b
4,3,c
5,5,c
6,4,c


In [17]:
%SQL_schema T1

{0: ('G1', str), 1: ('X', int)}

In [20]:
%%SQL
SELECT *
FROM T1

Unnamed: 0,G1,X
0,a,5
1,a,2
2,b,3
3,b,1


In [18]:
%SQL_schema T2

{0: ('G2', str), 1: ('Y', int)}

In [21]:
%%SQL
SELECT *
FROM T2

Unnamed: 0,G2,Y
0,a,3
1,a,2
2,b,4
3,b,1
4,c,1


#### Delete and start fresh

In [57]:
# On Windows, go delete the db from the folder
!rm ./databases/uber_samples.db

#### Make a connection to a new database

In [58]:
from sqlalchemy import create_engine
import pandas as pd
engine = create_engine('sqlite:///databases/uber_samples.db', echo=False)

In [59]:
from glob import glob

In [60]:
files = glob('./data/*uber*.csv')
files

['./data/uber-raw-data-jun14-sample.csv',
 './data/uber-raw-data-apr14-sample.csv',
 './data/uber-raw-data-janjune-15-sample.csv',
 './data/uber-raw-data-may14-sample.csv',
 './data/uber-raw-data-sep14-sample.csv',
 './data/uber-raw-data-aug14-sample.csv',
 './data/uber-raw-data-jul14-sample.csv']

In [61]:
import re
uber_date_re = re.compile(r'\./data/uber-raw-data-([a-z0-9\-]+)-sample.csv')
table_name = lambda f: uber_date_re.match(f).group(1).replace('-', '')
date_cols = lambda f: ['Date/Time'] if table_name(f) != 'janjune15' else ['Pickup_date'] 

In [62]:
from unpythonic import begin
uber_dfs = {begin(print('processing {0}'.format(f)), table_name(f)):pd.read_csv(f, parse_dates=date_cols(f)) for f in files}

processing ./data/uber-raw-data-jun14-sample.csv
processing ./data/uber-raw-data-apr14-sample.csv
processing ./data/uber-raw-data-janjune-15-sample.csv
processing ./data/uber-raw-data-may14-sample.csv
processing ./data/uber-raw-data-sep14-sample.csv
processing ./data/uber-raw-data-aug14-sample.csv
processing ./data/uber-raw-data-jul14-sample.csv


In [63]:
for name, df in uber_dfs.items():
    print('Creating {0}'.format(name))
    create_table(df, name, engine)

Creating jun14
Creating apr14
Creating janjune15
Creating may14
Creating sep14
Creating aug14
Creating jul14


In [30]:
from dfply import *
df = pd.read_csv(files[0])
pd.to_datetime(df['Date/Time'])

0       2014-06-19 16:49:00
1       2014-06-12 21:25:00
2       2014-06-15 22:23:00
3       2014-06-14 20:34:00
4       2014-06-13 14:36:00
5       2014-06-03 08:40:00
6       2014-06-15 17:08:00
7       2014-06-02 16:19:00
8       2014-06-03 07:16:00
9       2014-06-29 00:18:00
10      2014-06-26 08:42:00
11      2014-06-18 08:48:00
12      2014-06-23 20:21:00
13      2014-06-05 15:46:00
14      2014-06-06 13:15:00
15      2014-06-28 00:14:00
16      2014-06-20 23:24:00
17      2014-06-26 12:56:00
18      2014-06-26 14:12:00
19      2014-06-18 13:23:00
20      2014-06-06 11:01:00
21      2014-06-30 07:26:00
22      2014-06-12 17:51:00
23      2014-06-07 13:01:00
24      2014-06-29 15:43:00
25      2014-06-28 12:13:00
26      2014-06-04 21:15:00
27      2014-06-05 23:15:00
28      2014-06-25 23:13:00
29      2014-06-16 23:23:00
                ...        
99970   2014-06-24 18:18:00
99971   2014-06-26 18:03:00
99972   2014-06-18 01:06:00
99973   2014-06-14 08:32:00
99974   2014-06-10 1

In [64]:
taxi_zone = pd.read_csv('./data/taxi-zone-lookup.csv')
taxi_zone

Unnamed: 0,LocationID,Borough,Zone
0,1,EWR,Newark Airport
1,2,Queens,Jamaica Bay
2,3,Bronx,Allerton/Pelham Gardens
3,4,Manhattan,Alphabet City
4,5,Staten Island,Arden Heights
5,6,Staten Island,Arrochar/Fort Wadsworth
6,7,Queens,Astoria
7,8,Queens,Astoria Park
8,9,Queens,Auburndale
9,10,Queens,Baisley Park


In [65]:
create_table(taxi_zone, 'taxi_zone_lookup', engine)

In [66]:
base = pd.DataFrame({'base_code':['B02512','B02598','B02617','B02682','B02764','B02765','B02835','B02836'],
                     'base_name':['Unter', 'Hinter', 'Weiter', 'Schmecken', 'Danach-NY', 'Grun', 'Dreist', 'Drinnen']})
base
            

Unnamed: 0,base_code,base_name
0,B02512,Unter
1,B02598,Hinter
2,B02617,Weiter
3,B02682,Schmecken
4,B02764,Danach-NY
5,B02765,Grun
6,B02835,Dreist
7,B02836,Drinnen


In [67]:
create_table(base, 'base_lookup', engine)

In [68]:
%load_ext pyensae
%SQL_connect ./databases/uber_samples.db
%SQL_tables

The pyensae extension is already loaded. To reload it, use:
  %reload_ext pyensae


['apr14',
 'aug14',
 'base_lookup',
 'janjune15',
 'jul14',
 'jun14',
 'may14',
 'sep14',
 'taxi_zone_lookup']

In [69]:
%SQL_schema apr14

{0: ('Date/Time', datetime.datetime),
 1: ('Lat', float),
 2: ('Lon', float),
 3: ('Base', str)}

In [70]:
%%SQL
SELECT *
FROM apr14
LIMIT 5

Unnamed: 0,Date/Time,Lat,Lon,Base
0,2014-04-18 21:38:00.000000,40.7359,-73.9852,B02682
1,2014-04-23 15:19:00.000000,40.7642,-73.9543,B02598
2,2014-04-10 07:15:00.000000,40.7138,-74.0103,B02598
3,2014-04-11 15:23:00.000000,40.7847,-73.9698,B02682
4,2014-04-07 17:26:00.000000,40.646,-73.7767,B02598


In [71]:
%SQL_schema janjune15

{0: ('Dispatching_base_num', str),
 1: ('Pickup_date', datetime.datetime),
 2: ('Affiliated_base_num', str),
 3: ('locationID', int)}

In [72]:
%%SQL
SELECT *
FROM janjune15
LIMIT 5

Unnamed: 0,Dispatching_base_num,Pickup_date,Affiliated_base_num,locationID
0,B02598,2015-01-18 09:25:38.000000,B02598,249
1,B02617,2015-05-17 11:44:00.000000,B02617,33
2,B02617,2015-05-17 11:46:00.000000,B02617,186
3,B02598,2015-01-19 05:32:05.000000,B02598,48
4,B02598,2015-01-19 22:28:20.000000,B02598,40


In [73]:
%SQL_schema base_lookup

{0: ('base_code', str), 1: ('base_name', str)}

In [74]:
%%SQL
SELECT *
FROM base_lookup
LIMIT 5

Unnamed: 0,base_code,base_name
0,B02512,Unter
1,B02598,Hinter
2,B02617,Weiter
3,B02682,Schmecken
4,B02764,Danach-NY


In [75]:
%SQL_schema taxi_zone_lookup

{0: ('LocationID', int), 1: ('Borough', str), 2: ('Zone', str)}

In [76]:
%%SQL
SELECT *
FROM taxi_zone_lookup
LIMIT 5

Unnamed: 0,LocationID,Borough,Zone
0,1,EWR,Newark Airport
1,2,Queens,Jamaica Bay
2,3,Bronx,Allerton/Pelham Gardens
3,4,Manhattan,Alphabet City
4,5,Staten Island,Arden Heights
