# Creating and Manipulating your own Databases

## Creating databases and tables

With sqlite, the create_engine() statement will create a file which creates a database. Once the database is created to update it, raw sql or tools like Alembic is are needed.

### Creating tables with SQLAlchemy

In [10]:
from sqlalchemy import Table, Column, String, Integer, Float, Boolean, create_engine, MetaData
metadata=MetaData()
engine = create_engine("sqlite:///data.sqlite")
data = Table("data", metadata,
            Column("name", String(255)),
             Column("count", Integer()),
             Column("amount", Float()),
            Column("valid", Boolean()))
metadata.create_all(engine)
print(repr(data))

Table('data', MetaData(bind=None), Column('name', String(length=255), table=<data>), Column('count', Integer(), table=<data>), Column('amount', Float(), table=<data>), Column('valid', Boolean(), table=<data>), schema=None)


### Constraints and data defaults

In [12]:
metadata=MetaData()
engine = create_engine("sqlite:///data.sqlite")
connection = engine.connect()
data = Table("data", metadata,
            Column("name", String(255), unique=True),
             Column("count", Integer(), default=1),
             Column("amount", Float()),
            Column("valid", Boolean(), default=False))
metadata.create_all(engine)
print(repr(metadata.tables["data"]))


Table('data', MetaData(bind=None), Column('name', String(length=255), table=<data>), Column('count', Integer(), table=<data>, default=ColumnDefault(1)), Column('amount', Float(), table=<data>), Column('valid', Boolean(), table=<data>, default=ColumnDefault(False)), schema=None)


it's time to learn how to insert data into them!

## Inserting data into a table

### Inserting a single row

In [13]:
from sqlalchemy import insert, select

insert_stmt = insert(data).values(name="Anna", count=1, amount=1000.00, valid=True)
results = connection.execute(insert_stmt)
print(results.rowcount)
select_stmt = select([data]).where(data.columns.name == "Anna")
print(connection.execute(select_stmt).first())

1
('Anna', 1, 1000.0, True)


### Inserting multiple records at once

When inserting multiple records at once, you do not use the .values()

In [14]:
values_list = [
    {'name': "Anne", 'count': 1, 'amount': 1000.00, 'valid': True},
    {'name': "Taylor", 'count': 1, 'amount': 750.00, 'valid': False}
]

stmt = insert(data)
results = connection.execute(stmt, values_list)
print(results.rowcount)

2


### Loading a CSV into a table


One way to do that would be to read a CSV file line by line, create a dictionary from each line, and then use insert(). 

But there is a faster way using pandas.

In [26]:
import pandas as pd
engine = create_engine("sqlite:///censusdb.sqlite")
metadata = MetaData()
connection = engine.connect()
censusdb = Table("censusdb", metadata,
                Column("state", String(30)),
                Column("sex", String(1)),
                Column("age", Integer()),
                Column("pop2000", Integer()),
                Column("pop2008", Integer()))
metadata.create_all(engine)

census_df = pd.read_csv("census.csv", index_col=0)
census_df.columns = ['state', 'sex', 'age', 'pop2000', 'pop2008']
census_df.to_sql(name="censusbd", con=connection, if_exists="append", index=False)

In [38]:
engine = create_engine("sqlite:///censusdb.sqlite")

metadata = MetaData()
census= Table("census", metadata, autoload=True, autoload_with=engine)

stmt = select([census])
results = engine.execute(stmt).fetchall()
pd.DataFrame(results)

Unnamed: 0,0,1,2,3,4
0,Illinois,M,0,89600,95012
1,Illinois,M,1,88445,91829
2,Illinois,M,2,88729,89547
3,Illinois,M,3,88868,90037
4,Illinois,M,4,91947,91111
...,...,...,...,...,...
8767,Texas,F,81,35378,44418
8768,Texas,F,82,33852,41838
8769,Texas,F,83,30076,40489
8770,Texas,F,84,27961,36821


## Updating data in a table

Correlated update: Using a select statement to get the value to be used in an update

### Updating individual records

In [35]:
from sqlalchemy import update
engine = create_engine("sqlite:///censusupdate.sqlite")
metadata = MetaData()
connection = engine.connect()
state_fact = Table("state_fact", metadata, autoload=True, autoload_with=engine)

select_stmt = select([state_fact]).where(state_fact.columns.name=="New York")


results = connection.execute(select_stmt).fetchall()
print(results)
print(results[0]["fips_state"])

update_stmt = update(state_fact).values(fips_state = 0)
update_stmt = update_stmt.where(state_fact.columns.name == "New York")
update_results = connection.execute(update_stmt)

select_stmt = select([state_fact]).where(state_fact.columns.name=="New York")


results = connection.execute(select_stmt).fetchall()
print(results)
print(results[0]["fips_state"])

[('32', 'New York', 'NY', 'USA', 'state', '10', 'current', 'occupied', '', '36', 'N.Y.', 'II', '1', 'Northeast', '2', 'Mid-Atlantic', '2')]
36
[('32', 'New York', 'NY', 'USA', 'state', '10', 'current', 'occupied', '', '0', 'N.Y.', 'II', '1', 'Northeast', '2', 'Mid-Atlantic', '2')]
0


### Updating multiple records

updating multiple records works exactly the same way as updating a single record (as long as you are updating them with the same value).

In [54]:
select_stmt = select([state_fact.columns.notes])

results = connection.execute(select_stmt).fetchall()
print(results)


stmt = update(state_fact).values(notes="The Wild West")
stmt_west = stmt.where(state_fact.columns.census_region_name=="West")
results = connection.execute(stmt_west)
print(results.rowcount)

select_stmt = select([state_fact.columns.notes])

results = connection.execute(select_stmt).fetchall()
print(results)

[('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('',)]
13
[('',), ('',), ('',), ('The Wild West',), ('',), ('',), ('The Wild West',), ('',), ('The Wild West',), ('The Wild West',), ('',), ('',), ('The Wild West',), ('',), ('',), ('The Wild West',), ('',), ('',), ('',), ('',), ('',), ('',), ('',), ('The Wild West',), ('',), ('',), ('',), ('',), ('',), ('The Wild West',), ('',), ('',), ('',), ('',), ('',), ('',), ('The Wild West',), ('The Wild West',), ('The Wild West',), ('',), ('',), ('',), ('',), ('',), ('The Wild West',), ('',), ('',), ('',), ('',), ('The Wild West',), ('',)]


### Correlated updates

Records can also be updated with data from a select statement. It works by defining a select statement that returns the value you want to update the record with and assigning that select statement as the value in update.

In [114]:
# taking data from state_fact

# engine = create_engine("sqlite:///census.sqlite")
# metadata = MetaData()
# connection = engine.connect()
# state_fact = Table("state_fact", metadata, autoload=True, autoload_with=engine)

# st = select([state_fact])
# r = connection.execute(st).fetchall()
# df = pd.DataFrame(r)
# df.columns = state_fact.columns.keys()
# df = df[["name", "fips_state"]]
# df.to_csv("state_fact.csv")

# Creating database with 2 tables to connect them at the same time since you cannot 
# connect 2 table from different databases.

# engine = create_engine("sqlite:///correlated.sqlite")
# metadata = MetaData()
# connection = engine.connect()
# state_fact = Table("state_fact", metadata,
#                 Column("name", String(length=256)),
#                 Column("fips_state", String(length=256)))

# flat_census = Table("flat_census", metadata,
#                 Column("state_name", String(length=256)),
#                 Column("fips_code", String(length=256)))

# metadata.create_all(engine)

# loading csv files to the database
# flat_censusdf= pd.read_csv("flat_census.csv",index_col=0)
# flat_censusdf.to_sql(name="flat_census", con=connection, if_exists="append", index=False,index_label='fips_code')

# state_factdf= pd.read_csv("state_fact.csv", index_col=0)
# state_factdf.to_sql(name="state_fact", con=connection, if_exists="append", index=False)

# Checking the records in the database.
# flat_census = Table("flat_census", metadata, autoload=True, autoload_with=engine)

# s = select([flat_census])
# r = connection.execute(s).fetchall()
# r

# state_fact = Table("state_fact", metadata, autoload=True, autoload_with=engine)

# s = select([state_fact])
# r = connection.execute(s).fetchall()
# r

# engine = create_engine("sqlite:///correlated1.sqlite")
# metadata = MetaData()
# connection = engine.connect()
# flat_census = Table("flat_census", metadata, autoload=True, autoload_with=engine)
# state_fact = Table("state_fact", metadata, autoload=True, autoload_with=engine)

# empty values of the table
# stmt = select([flat_census.columns.state_name])
# result = connection.execute(stmt).fetchall()
# print(result)

fips_stmt = select([state_fact.columns.name])
fips_stmt = fips_stmt.where(state_fact.columns.fips_state == flat_census.columns.fips_code)
update_stmt = update(flat_census).values(state_name=fips_stmt)
results = connection.execute(update_stmt)
print(results.rowcount)

# values after updating
st = select([flat_census])
results = connection.execute(st).fetchall()
print(results)

[('Illinois', '17'), ('New Jersey', '34'), ('North Dakota', '38'), ('Oregon', '41'), ('Washington DC', '11'), ('Wisconsin', '55'), ('Arizona', '4'), ('Arkansas', '5'), ('Colorado', '8'), ('Hawaii', '15'), ('Kansas', '20'), ('Louisiana', '22'), ('Montana', '30'), ('Nebraska', '31'), ('Oklahoma', '40'), ('Idaho', '16'), ('Massachusetts', '25'), ('Michigan', '26'), ('Missouri', '29'), ('North Carolina', '37'), ('Ohio', '39'), ('Rhode Island', '44'), ('South Carolina', '45'), ('Wyoming', '56'), ('Indiana', '18'), ('Pennsylvania', '42'), ('South Dakota', '46'), ('Tennessee', '47'), ('Vermont', '50'), ('Alaska', '2'), ('Delaware', '10'), ('Kentucky', '21'), ('Mississippi', '28'), ('Virginia', '51'), ('Florida', '12'), ('Maryland', '24'), ('Nevada', '32'), ('Washington', '53'), ('California', '6'), ('Connecticut', '9'), ('Georgia', '13'), ('Iowa', '19'), ('Maine', '23'), ('New Hampshire', '33'), ('New Mexico', '35'), ('Texas', '48'), ('Alabama', '1'), ('Minnesota', '27'), ('New York', '36'), 

## Deleting data from a database

The delete statement targets a table and uses a where clause to determine which row to delete.

Drop deletes the table from the database.

### Deleting all the records from a table


In [4]:
from sqlalchemy import delete
engine = create_engine("sqlite:///censusdeleteall.sqlite")
metadata = MetaData()
connection = engine.connect()
census = Table("census", metadata, autoload=True, autoload_with=engine)

delete_stmt = delete(census)
results = connection.execute(delete_stmt)
print(results.rowcount)
select_stmt = select([census])
print(connection.execute(select_stmt).fetchall())

8772
[]


empty a table of all of its records so you can reload the data. 

### Deleting specific records

In [12]:
from sqlalchemy import func
from sqlalchemy import and_
engine = create_engine("sqlite:///census_specific.sqlite")
metadata = MetaData()
connection = engine.connect()
census = Table("census", metadata, autoload=True, autoload_with=engine)

count_stmt = select([func.count(census.columns.sex)]).where(and_(census.columns.sex=="M",
                                                                census.columns.age == 36))

to_delete = connection.execute(count_stmt).scalar()
delete_stmt = delete(census)
delete_stmt = delete_stmt.where(and_(census.columns.sex == "M",
                                    census.columns.age ==36))
results = connection.execute(delete_stmt)
print(results.rowcount, to_delete)

51 51


You may frequently be required to remove specific records from a table, like in this case.

### Deleting a table completely

Dropping individual tables from a database with the .drop() method, as well as all tables in a database with the .drop_all() method.

You can check to see if a table exists on an engine with the .exists(engine) method.


In [21]:
engine = create_engine("sqlite:///censusdrop.sqlite")
metadata = MetaData()
connection = engine.connect()
census = Table("census", metadata, autoload=True, autoload_with=engine)
state_fact = Table("state_fact", metadata, autoload=True, autoload_with=engine)

print(state_fact.exists(engine))
state_fact.drop(engine)
print(state_fact.exists(engine))

metadata.drop_all(engine)
print(census.exists(engine))

True
False
False


In [25]:
engine.table_names()

[]