In [51]:
import sqlite3
from sqlalchemy import create_engine 
import re
import requests
import pandas as pd # dataframes
from math import pi
import datetime
from itertools import chain
from collections import namedtuple
from bokeh.io import show, output_notebook
from bokeh.plotting import figure, output_file, show
from bokeh.models import HoverTool, FuncTickFormatter, FixedTicker, ColumnDataSource, SingleIntervalTicker, LinearAxis


In [2]:
#reading csv into pandas dataframe for InjectionWells
injection_wells = pd.read_csv("data/InjectionWells.csv")

In [76]:
#reading csv into pandas dataframe for OkQuakes
ok_quakes = pd.read_csv("data/okQuakes.csv")

In [None]:
#code to look at datatypes of a dataframe
injection_wells.dtypes

In [77]:
#code to look at datatypes of a dataframe
ok_quakes.dtypes

time                object
latitude           float64
longitude          float64
depth              float64
mag                float64
magType             object
nst                float64
gap                float64
dmin               float64
rms                float64
net                 object
id                  object
updated             object
place               object
type                object
horizontalError    float64
depthError         float64
magError           float64
magNst             float64
status              object
locationSource      object
magSource           object
dtype: object

In [None]:
#code to look at the set up of the dataframe
injection_wells.head()

In [78]:
#code to look at the set up of the dataframe
ok_quakes.head()

Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,...,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource
0,1973-03-17T07:43:05.500Z,36.087,-106.168,6.0,4.5,mb,,,,,...,2014-11-06T23:21:10.078Z,New Mexico,earthquake,,,,,reviewed,us,us
1,1973-05-25T14:40:13.900Z,33.917,-90.775,6.0,,,,,,,...,2014-11-06T23:21:12.859Z,Mississippi,earthquake,,,,,reviewed,s,us
2,1973-09-19T13:28:20.500Z,37.16,-104.594,5.0,,,,,,,...,2014-11-06T23:21:20.295Z,Colorado,earthquake,,,,,reviewed,us,us
3,1973-09-23T03:58:54.900Z,37.148,-104.571,5.0,4.2,mb,,,,,...,2014-11-06T23:21:20.346Z,Colorado,earthquake,,,,,reviewed,us,us
4,1974-02-15T13:33:49.200Z,36.5,-100.693,24.0,4.5,mb,,,,,...,2014-11-06T23:21:22.859Z,Oklahoma,earthquake,,,,,reviewed,us,us


In [80]:
#changing Approval Date column to a datetime datatype
injection_wells['Approval Date'] = pd.to_datetime(injection_wells['Approval Date'], format='%m/%d/%Y')

In [5]:
#Creating the cursor connections
conn = sqlite3.connect('injection_wells.db')
cur = conn.cursor() 
cursor = conn.cursor()

In [6]:
#Creating the database Injection_Wells
injection_wells.to_sql("Injection_Wells", conn, if_exists='replace', index=True)

  chunksize=chunksize, dtype=dtype)


In [79]:
#Creating the database OK_Quakes
ok_quakes.to_sql("OK_Quakes", conn, if_exists='replace', index=True)

In [None]:
#Drop table command if it already exists
#cur.execute("DROP TABLE Year;")

In [9]:
#sql query to create new table from query
cur.execute("create table Year as select  WellName,strftime('%Y', `Approval Date`) as approval_year from Injection_Wells;")


    
  

<sqlite3.Cursor at 0x1a353ba71f0>

In [10]:
#tesing new table from query

cur.execute("pragma table_info(Year);")
tablecolumnnames=cur.fetchall()
print(tablecolumnnames)

[(0, 'WellName', 'TEXT', 0, None, 0), (1, 'approval_year', '', 0, None, 0)]


In [74]:
#sql query of year table for year count list
cur.execute("select approval_year, count(approval_year) as year_count from Year group by approval_year;")
year_count=cur.fetchall()
print(year_count)

[(None, 0), ('1936', 1), ('1945', 1), ('1946', 1), ('1947', 4), ('1948', 1), ('1949', 8), ('1950', 13), ('1951', 2), ('1952', 8), ('1953', 34), ('1954', 34), ('1955', 45), ('1956', 64), ('1957', 71), ('1958', 65), ('1959', 179), ('1960', 126), ('1961', 157), ('1962', 134), ('1963', 95), ('1964', 64), ('1965', 96), ('1966', 80), ('1967', 92), ('1968', 276), ('1969', 95), ('1970', 99), ('1971', 145), ('1972', 192), ('1973', 79), ('1974', 77), ('1975', 76), ('1976', 77), ('1977', 79), ('1978', 76), ('1979', 123), ('1980', 142), ('1981', 132), ('1982', 173), ('1983', 242), ('1984', 243), ('1985', 141), ('1986', 182), ('1987', 199), ('1988', 175), ('1989', 159), ('1990', 268), ('1991', 195), ('1992', 254), ('1993', 208), ('1994', 143), ('1995', 190), ('1996', 212), ('1997', 175), ('1998', 177), ('1999', 134), ('2000', 175), ('2001', 153), ('2002', 167), ('2003', 194), ('2004', 170), ('2005', 165), ('2006', 200), ('2007', 275), ('2008', 234), ('2009', 246), ('2010', 293), ('2011', 438), ('20

In [46]:
#turning year count list into dataframe and changing the column names and testing it worked correctly

df = pd.DataFrame(year_count)
df.columns = ['Year', 'Count']

df.head()

Unnamed: 0,Year,Count
0,,0
1,1936.0,1
2,1945.0,1
3,1946.0,1
4,1947.0,4


In [75]:
#Bokeh visualization
#Pull the year and count of year into their own list to pass to bokeh for the different axis and plot values.

count = [row[1] for row in year_count]
year = [row[0] for row in year_count]

#Set to open charts in the notebook
output_notebook()


p = figure(plot_width=1200, plot_height=600, x_axis_type=None, y_axis_type=None)

p.line(x=year, y=count, color='navy', alpha=1.5)

#formatting the y and x axis start ticks
p.y_range.start = 0
p.x_range.start = 1936
p.x_range.end = 2018

#formatting the x axis ticks and tick labels
xticker = SingleIntervalTicker(interval=2, num_minor_ticks=2)
xaxis = LinearAxis(ticker=xticker)
p.add_layout(xaxis, 'below')
p.xaxis.major_label_orientation = "vertical"

#formatting the y axis left ticks and tick labels
yticker = SingleIntervalTicker(interval=20, num_minor_ticks=2)
yaxis = LinearAxis(ticker=yticker)
p.add_layout(yaxis, 'left')

show(p)