In [1]:
import pandas as pd
import sqlite3

In [2]:
ss = pd.read_csv('state_stats.csv')
ud = pd.read_csv('ufo_data.csv', low_memory = False)

In [3]:
ss.head(3)

Unnamed: 0,name,drug_deaths,abbreviation,sightings,avg_sighting_duration,code,census_area
0,Alabama,724,al,691,1393.408828,1,50645.326
1,Alaska,107,ak,354,4231.830508,2,570640.95
2,Arizona,1725,az,2689,5949.009338,4,113594.084


In [4]:
ud.head(3)

Unnamed: 0,datetime,city,state,country,shape,duration,duration_hours,comments,date_posted,latitude,longitude,year,month
0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700,45 minutes,This event took place in early fall around 194...,4/27/2004,29.8831,-97.9411,1949.0,10.0
1,10/10/1949 21:00,lackland afb,tx,,light,7200,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,12/16/2005,29.3842,-98.5811,1949.0,10.0
2,10/10/1955 17:00,chester (uk/england),,gb,circle,20,20 seconds,Green/Orange circular disc over Chester&#44 En...,1/21/2008,53.2,-2.9167,1955.0,10.0


In [5]:
ud.dropna(inplace=True)
ud

Unnamed: 0,datetime,city,state,country,shape,duration,duration_hours,comments,date_posted,latitude,longitude,year,month
0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700,45 minutes,This event took place in early fall around 194...,4/27/2004,29.8831,-97.9411,1949.0,10.0
3,10/10/1956 21:00,edna,tx,us,circle,20,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.9783,-96.6458,1956.0,10.0
4,10/10/1960 20:00,kaneohe,hi,us,light,900,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.4181,-157.8036,1960.0,10.0
5,10/10/1961 19:00,bristol,tn,us,sphere,300,5 minutes,My father is now 89 my brother 52 the girl wit...,4/27/2007,36.595,-82.1889,1961.0,10.0
7,10/10/1965 23:45,norwalk,ct,us,disk,1200,20 minutes,A bright orange color changing to reddish colo...,10/2/1999,41.1175,-73.4083,1965.0,10.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
80327,9/9/2013 21:15,nashville,tn,us,light,600,10 minutes,Round from the distance/slowly changing colors...,9/30/2013,36.1658,-86.7844,2013.0,9.0
80328,9/9/2013 22:00,boise,id,us,circle,1200,20 minutes,Boise&#44 ID&#44 spherical&#44 20 min&#44 10 r...,9/30/2013,43.6136,-116.2025,2013.0,9.0
80329,9/9/2013 22:00,napa,ca,us,other,1200,hour,Napa UFO&#44,9/30/2013,38.2972,-122.2844,2013.0,9.0
80330,9/9/2013 22:20,vienna,va,us,circle,5,5 seconds,Saw a five gold lit cicular craft moving fastl...,9/30/2013,38.9011,-77.2656,2013.0,9.0


In [6]:
db_conn = sqlite3.connect('../database.sqlite')

In [7]:
c = db_conn.cursor()

In [8]:
c.execute('DROP TABLE IF EXISTS ufo_data;')

c.execute(
    
"""
CREATE TABLE ufo_data(
id SERIAL PRIMARY KEY,
datetime VARCHAR,
city VARCHAR,
state VARCHAR,
country VARCHAR,
shape VARCHAR,
duration FLOAT,
duration_hours VARCHAR,
comments VARCHAR,
date_posted VARCHAR,
latitude FLOAT,
longitude FLOAT,
year FLOAT,
month FLOAT,
Foreign Key (state) REFERENCES state_stats(abbreviation));

"""
    
)

<sqlite3.Cursor at 0x23e5ed3c9d0>

In [9]:
c.execute('DROP TABLE IF EXISTS state_stats;')

c.execute(
    
"""

CREATE TABLE state_stats(
name VARCHAR,
drug_deaths INT,
abbreviation VARCHAR,
sightings INT,
avg_sighting_duration FLOAT,
code VARCHAR,
census_area FLOAT,
PRIMARY KEY (abbreviation));

"""
    
)

<sqlite3.Cursor at 0x23e5ed3c9d0>

In [10]:
ud.to_sql('ufo_data', db_conn, if_exists='append', index=False)
ss.to_sql('state_stats', db_conn, if_exists='append', index=False)

In [11]:
# Test 1 with Pandas

pd.read_sql("SELECT * FROM ufo_data LIMIT 3", db_conn)

Unnamed: 0,id,datetime,city,state,country,shape,duration,duration_hours,comments,date_posted,latitude,longitude,year,month
0,,10/10/1949 20:30,san marcos,tx,us,cylinder,2700.0,45 minutes,This event took place in early fall around 194...,4/27/2004,29.8831,-97.9411,1949.0,10.0
1,,10/10/1956 21:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.9783,-96.6458,1956.0,10.0
2,,10/10/1960 20:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.4181,-157.8036,1960.0,10.0


In [12]:
# Test 2 with Pandas

pd.read_sql("SELECT * FROM state_stats LIMIT 3", db_conn)

Unnamed: 0,name,drug_deaths,abbreviation,sightings,avg_sighting_duration,code,census_area
0,Alabama,724,al,691,1393.408828,1,50645.326
1,Alaska,107,ak,354,4231.830508,2,570640.95
2,Arizona,1725,az,2689,5949.009338,4,113594.084


In [13]:
# Test with SQLAlchemy

# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func
from scipy.stats import ttest_ind

In [14]:
engine = create_engine("sqlite:///../database.sqlite")

In [15]:
# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(engine, reflect=True)

In [16]:
# We can view all of the classes that automap found
Base.classes.keys()

['state_stats', 'ufo_data']

In [17]:
ufo_data = Base.classes.ufo_data
state_stats = Base.classes.state_stats

In [18]:
session = Session(engine)

In [19]:
sightings = session.query(state_stats.sightings)

In [20]:
for s in sightings[:10]:
    print(s)

(691,)
(354,)
(2689,)
(666,)
(9655,)
(1505,)
(968,)
(183,)
(99,)
(4200,)
