### SQL Assignment - sqlalchemy

#### Problem Statement:

Read the following data set:
https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data

Task:
1. Create an sqlalchemy engine using a sample from the data set
2. Write two basic update queries
3. Write two delete queries
4. Write two filter queries
5. Write two function queries

In [88]:
import pandas as pd
from numpy import genfromtxt
from time import time
from datetime import datetime
from sqlalchemy import Column, Integer, Float, Date, String, MetaData
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

In [4]:
url1 = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data'
url2 = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.names'
cols = ["age", "type_employer", "fnlwgt", "education", 
                "education_num","marital", "occupation", "relationship", "race","sex",
                "capital_gain", "capital_loss", "hr_per_week","country", "income"]
df = pd.read_csv(url1,sep = ',',names = cols)
df.head()

Unnamed: 0,age,type_employer,fnlwgt,education,education_num,marital,occupation,relationship,race,sex,capital_gain,capital_loss,hr_per_week,country,income
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32561 entries, 0 to 32560
Data columns (total 15 columns):
age              32561 non-null int64
type_employer    32561 non-null object
fnlwgt           32561 non-null int64
education        32561 non-null object
education_num    32561 non-null int64
marital          32561 non-null object
occupation       32561 non-null object
relationship     32561 non-null object
race             32561 non-null object
sex              32561 non-null object
capital_gain     32561 non-null int64
capital_loss     32561 non-null int64
hr_per_week      32561 non-null int64
country          32561 non-null object
income           32561 non-null object
dtypes: int64(6), object(9)
memory usage: 2.6+ MB


In [11]:
df.to_csv("adult.csv")

In [89]:
from sqlalchemy import create_engine

engine = create_engine('sqlite:///adult.db', echo=True)

In [90]:
Base = declarative_base()

In [91]:
class adult(Base):
...     __tablename__ = 'adult_data'
...
...     id = Column(Integer, primary_key=True)
...     age = Column(Integer)
...     
...     fnlwgt = Column(String(20))
...     education = Column(String(20))
...     sex = Column(String(20))
...     income = Column(String(20))
...
...     def __repr__(self):
...        return "<adult_data(age='%d', education='%s',fnlwgt='%d',sex='%s',income='%s')>" % (
...                             self.age, self.education, self.fnlwgt, self.sex, self.income)

In [92]:
Session = sessionmaker()
Session.configure(bind=engine)
session = Session()

In [93]:
Base.metadata.create_all(engine)

2018-06-13 15:00:58,617 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2018-06-13 15:00:58,626 INFO sqlalchemy.engine.base.Engine ()
2018-06-13 15:00:58,631 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2018-06-13 15:00:58,634 INFO sqlalchemy.engine.base.Engine ()
2018-06-13 15:00:58,638 INFO sqlalchemy.engine.base.Engine PRAGMA table_info("adult_data")
2018-06-13 15:00:58,640 INFO sqlalchemy.engine.base.Engine ()


In [94]:
adult.__table__

Table('adult_data', MetaData(bind=None), Column('id', Integer(), table=<adult_data>, primary_key=True, nullable=False), Column('age', Integer(), table=<adult_data>), Column('fnlwgt', String(length=20), table=<adult_data>), Column('education', String(length=20), table=<adult_data>), Column('sex', String(length=20), table=<adult_data>), Column('income', String(length=20), table=<adult_data>), schema=None)

In [95]:
ed_adult = adult(age=23, fnlwgt=77516, education='Bachelors',sex='Male' , income='<=50k')

In [96]:
print(ed_adult)

<adult_data(age='23', education='Bachelors',fnlwgt='77516',sex='Male',income='<=50k')>


In [97]:
session.add(ed_adult)

In [98]:
our_cand = session.query(adult).first()

2018-06-13 15:01:14,937 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2018-06-13 15:01:14,942 INFO sqlalchemy.engine.base.Engine INSERT INTO adult_data (age, fnlwgt, education, sex, income) VALUES (?, ?, ?, ?, ?)
2018-06-13 15:01:14,944 INFO sqlalchemy.engine.base.Engine (23, 77516, 'Bachelors', 'Male', '<=50k')
2018-06-13 15:01:14,955 INFO sqlalchemy.engine.base.Engine SELECT adult_data.id AS adult_data_id, adult_data.age AS adult_data_age, adult_data.fnlwgt AS adult_data_fnlwgt, adult_data.education AS adult_data_education, adult_data.sex AS adult_data_sex, adult_data.income AS adult_data_income 
FROM adult_data
 LIMIT ? OFFSET ?
2018-06-13 15:01:14,958 INFO sqlalchemy.engine.base.Engine (1, 0)


In [99]:
our_cand

<adult_data(age='23', education='Bachelors',fnlwgt='77516',sex='Male',income='<=50k')>

In [104]:
session.add_all([
...     adult(age=40, fnlwgt=74728, education='Bachelors',sex='Male' , income='>=50k'),
...     adult(age=39, fnlwgt=75165, education='Degree',sex='Female' , income='>=50k')])

In [105]:
our_user = session.query(adult).filter_by(age=39).first() 

2018-06-13 15:08:38,188 INFO sqlalchemy.engine.base.Engine INSERT INTO adult_data (age, fnlwgt, education, sex, income) VALUES (?, ?, ?, ?, ?)
2018-06-13 15:08:38,273 INFO sqlalchemy.engine.base.Engine (23, 77516, 'Bachelors', 'Male', '<=50k')
2018-06-13 15:08:38,279 INFO sqlalchemy.engine.base.Engine INSERT INTO adult_data (age, fnlwgt, education, sex, income) VALUES (?, ?, ?, ?, ?)
2018-06-13 15:08:38,285 INFO sqlalchemy.engine.base.Engine (39, 75165, 'Degree', 'Female', '>=50k')
2018-06-13 15:08:38,290 INFO sqlalchemy.engine.base.Engine INSERT INTO adult_data (age, fnlwgt, education, sex, income) VALUES (?, ?, ?, ?, ?)
2018-06-13 15:08:38,295 INFO sqlalchemy.engine.base.Engine (40, 74728, 'Bachelors', 'Male', '>=50k')
2018-06-13 15:08:38,301 INFO sqlalchemy.engine.base.Engine INSERT INTO adult_data (age, fnlwgt, education, sex, income) VALUES (?, ?, ?, ?, ?)
2018-06-13 15:08:38,306 INFO sqlalchemy.engine.base.Engine (39, 75165, 'Degree', 'Female', '>=50k')
2018-06-13 15:08:38,311 IN

In [109]:
for user in session.query(adult).filter(~adult.age.in_([39, 40])):
    print(user)

2018-06-13 15:14:12,246 INFO sqlalchemy.engine.base.Engine SELECT adult_data.id AS adult_data_id, adult_data.age AS adult_data_age, adult_data.fnlwgt AS adult_data_fnlwgt, adult_data.education AS adult_data_education, adult_data.sex AS adult_data_sex, adult_data.income AS adult_data_income 
FROM adult_data 
WHERE adult_data.age NOT IN (?, ?)
2018-06-13 15:14:12,252 INFO sqlalchemy.engine.base.Engine (39, 40)
<adult_data(age='23', education='Bachelors',fnlwgt='77516',sex='Male',income='<=50k')>


TypeError: %d format: a number is required, not str

In [34]:
def Load_Data(file_name):
    data = genfromtxt(file_name, delimiter=',', skip_header=1, converters={0: lambda s: str(s)})
    return data.tolist()

Base = declarative_base()

class adult_data(Base):
    __tablename__ = 'adult'
    __table_args__ = {'sqlite_autoincrement': True}
    #tell SQLAlchemy the name of column and its attributes:
    id = Column(Integer, primary_key=True, nullable=False) 
    age = Column(Integer)
    type_employer = Column(String)
    fnlwgt = Column(Integer)
    education = Column(String)
    sex = Column(String)
    income = Column(String)

if __name__ == "__main__":
    t = time()

    #Create the database
    engine = create_engine('sqlite:///csv_test.db')
    Base.metadata.create_all(engine)

    #Create the session
    session = sessionmaker()
    session.configure(bind=engine)
    s = session()

    try:
        file_name = "adult.csv" #sample CSV file used:  http://www.google.com/finance/historical?q=NYSE%3AT&ei=W4ikVam8LYWjmAGjhoHACw&output=csv
        data = Load_Data(file_name) 

        for i in data:
            record = adult(**{
                'id': i[1],
                'age' : i[2],
                'type_employer' : i[3],
                'fnlwgt' : i[4],
                'education' : i[5],
                'sex' : i[6],
                'income':i[7]
            })
            s.add(record) #Add all the records

        s.commit() #Attempt to commit all the records
    except:
        s.rollback() #Rollback the changes on error
    finally:
        s.close() #Close the connection
    print ("Time elapsed: " + str(time() - t) + " s.") #0.091s

Time elapsed: 1.8291831016540527 s.


In [110]:
our_user = session.query(adult).filter_by(age=39).first() 

2018-06-13 15:16:24,322 INFO sqlalchemy.engine.base.Engine SELECT adult_data.id AS adult_data_id, adult_data.age AS adult_data_age, adult_data.fnlwgt AS adult_data_fnlwgt, adult_data.education AS adult_data_education, adult_data.sex AS adult_data_sex, adult_data.income AS adult_data_income 
FROM adult_data 
WHERE adult_data.age = ?
 LIMIT ? OFFSET ?
2018-06-13 15:16:24,325 INFO sqlalchemy.engine.base.Engine (39, 1, 0)


In [None]:
our_user