In [2]:
import pandas as pd
import pyodbc
import os

def start(connection_string):
    cnxn = pyodbc.connect(connection_string)
    for dc in (pyodbc.SQL_CHAR, pyodbc.SQL_WCHAR):
        cnxn.setdecoding(dc, encoding='utf-8')
    cnxn.setencoding(encoding='utf-8')
    crsr = cnxn.cursor()
    crsr.fast_executemany = True
    return cnxn, crsr

In [3]:
cnxn, crs = start(os.environ.get('INFO20003_CNXN'))

In [4]:
COLS = ['passenger_count', 'trip_distance','payment_type','fare_amount',
        'extra','mta_tax','tip_amount', 'tolls_amount',
        'improvement_surcharge','total_amount']
dtypes = [f'{k} FLOAT()' for k in COLS[1:]] + ['passenger_count INT']

df = pd.read_csv("sample.csv")[COLS].reset_index().rename({'index': 'idx'}, axis= 1)
df.tail()

Unnamed: 0,idx,passenger_count,trip_distance,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount
99995,99995,1,0.75,2,6.5,0.5,0.5,0.0,0.0,0.3,7.8
99996,99996,1,2.4,2,11.0,0.5,0.5,0.0,0.0,0.3,12.3
99997,99997,1,0.8,1,6.0,0.5,0.5,1.45,0.0,0.3,8.75
99998,99998,1,4.73,1,18.5,0.5,0.5,3.96,0.0,0.3,23.76
99999,99999,2,0.8,1,4.5,0.5,0.5,1.16,0.0,0.3,6.96


Creating a table with Python and `pyodbc`
```python
create_q = """
CREATE TABLE IF NOT EXISTS nyc_dataset(
    idx INT NOT NULL AUTO_INCREMENT,
    trip_distance FLOAT,
    payment_type FLOAT,
    fare_amount FLOAT,
    extra FLOAT,
    mta_tax FLOAT,
    tip_amount FLOAT,
    tolls_amount FLOAT,
    improvement_surcharge FLOAT,
    total_amount FLOAT,
    passenger_count INT,
    PRIMARY KEY ( idx )
);
"""
cnxn.execute(create_q)
```

Inserting values into the table
```python
insert_cols = ','.join(df.columns)

insert_q = f"""
INSERT INTO nyc_dataset
    ({insert_cols})
    VALUES 
    (?,?,?,?,?,?,?,?,?,?,?)
"""

crsr.executemany(insert_q, [(i) for i in df.values])
cnxn.commit()
```

In [9]:
q = 'SELECT * FROM nyc_dataset'
df = pd.read_sql(q, cnxn)
df.tail()

Unnamed: 0,idx,trip_distance,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,passenger_count
99994,99995,0.75,2.0,6.5,0.5,0.5,0.0,0.0,0.3,7.8,1
99995,99996,2.4,2.0,11.0,0.5,0.5,0.0,0.0,0.3,12.3,1
99996,99997,0.8,1.0,6.0,0.5,0.5,1.45,0.0,0.3,8.75,1
99997,99998,4.73,1.0,18.5,0.5,0.5,3.96,0.0,0.3,23.76,1
99998,99999,0.8,1.0,4.5,0.5,0.5,1.16,0.0,0.3,6.96,2


In [11]:
where_q = """
select * from nyc_dataset
where trip_distance > 0 and total_amount > 0
"""
df = pd.read_sql(where_q, cnxn)
df.tail()

Unnamed: 0,idx,trip_distance,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,passenger_count
99492,99995,0.75,2.0,6.5,0.5,0.5,0.0,0.0,0.3,7.8,1
99493,99996,2.4,2.0,11.0,0.5,0.5,0.0,0.0,0.3,12.3,1
99494,99997,0.8,1.0,6.0,0.5,0.5,1.45,0.0,0.3,8.75,1
99495,99998,4.73,1.0,18.5,0.5,0.5,3.96,0.0,0.3,23.76,1
99496,99999,0.8,1.0,4.5,0.5,0.5,1.16,0.0,0.3,6.96,2
