# Import Dependencies

In [3]:
import numpy
import pandas as pd
import sqlite3
from sqlalchemy import create_engine,func
from sqlalchemy.orm import Session
from sqlalchemy import Column, Integer, String, Float
from sqlalchemy.ext.declarative import declarative_base

# Use Pandas to convert CSV to SQLITE

In [4]:
df = pd.read_csv('../data/pothole_datasd.csv')
df = df[['service_request_id', 'lat', 'lng', 'status', 'case_origin', 'council_district','comm_plan_name']]
connection = sqlite3.connect('potholes.sqlite')
df.to_sql('potholes', con=connection, if_exists='replace')

FileNotFoundError: [Errno 2] File ../data/pothole_datasd.csv does not exist: '../data/pothole_datasd.csv'

# SQLAlchemy Prep: Create custom Python Class
Note: Tried using `Base = automap_base()` method but reflection was not working.

Source: [https://github.com/davidjaimes/sqlalchemy-challenge/blob/master/climate.ipynb](https://github.com/davidjaimes/sqlalchemy-challenge/blob/master/climate.ipynb)

In [None]:
Base = declarative_base()

class Potholes(Base):
    __tablename__ = 'potholes'
    service_request_id = Column(Integer, primary_key=True)
    lat =  Column(Float)
    lng =  Column(Float)
    status =  Column(String)
    case_origin = Column(String)
    council_district = Column(Integer, primary_key = True)
    comm_plan_name = Column(String)

# Create database connection and Begin Queries

In [None]:
engine = create_engine('sqlite:///potholes.sqlite')
Base.metadata.create_all(engine)
session = Session(bind=engine)

### Example 1: Flatten SQL query.

In [None]:
# Query all latitudes and print first element.
latitudes = session.query(Potholes.lat).all()
print(latitudes[0])

# Problem: Value is inside a SET.
# Solution: Use NumPy to turn to array and then flatten.
latitudes = numpy.array(latitudes).flatten()
print(latitudes[0])

### Example 2: Filter SQL query.

In [None]:
new_requests = session.query(Potholes).filter(Potholes.status == 'In Process')
new_requests.count()

In [None]:
new_requests = session.query(Potholes).filter(Potholes.status == 'Closed')
new_requests.count()

In [None]:
new_requests = session.query(Potholes).filter(Potholes.status == 'New')
new_requests.count()

In [None]:
new_requests = session.query(Potholes).filter(Potholes.status == 'Referred')
new_requests.count()

In [None]:
new_requests = session.query(Potholes).filter(Potholes.case_origin == 'Phone')
new_requests.count()

In [None]:
new_requests = session.query(Potholes).filter(Potholes.case_origin == 'Web')
new_requests.count()

In [None]:
new_requests = session.query(Potholes).filter(Potholes.case_origin == 'Email')
new_requests.count()

In [None]:
# Design a query to show how many districts are available in this dataset?
def GetDistricts():
    results = session.query(Potholes.council_district).group_by(Potholes.council_district).count()
    print(f"{results} Districts total.")
    
GetDistricts()

In [None]:
# What are the most active districts? 
# List the districts and the counts in descending order.
active_districts = session.query(Potholes.council_district, func.count(Potholes.council_district))\
.group_by(Potholes.council_district).order_by(func.count(Potholes.council_district).desc()).all()
active_districts

In [None]:
# What are the most active referrals? 
# List the districts and the counts in descending order.
active_referrals = session.query(Potholes.case_origin, func.count(Potholes.case_origin))\
.group_by(Potholes.case_origin).order_by(func.count(Potholes.case_origin).desc()).all()
active_referrals

In [None]:
# What are the most active 'common places'? 
# List the common places and the counts in descending order.
active_places = session.query(Potholes.comm_plan_name, func.count(Potholes.comm_plan_name))\
.group_by(Potholes.comm_plan_name).order_by(func.count(Potholes.comm_plan_name).desc()).all()
active_places

In [None]:
#Group the districts by status on service request id
district_status = df.groupby(["council_district", "status"]).agg({"service_request_id":"count"}).reset_index()
district_status

In [None]:
#Group the referral 'case_origin' by district on service request id
district_referral = df.groupby(["council_district", "case_origin"]).agg({"service_request_id":"count"}).reset_index()
district_referral

In [None]:
#test to run sql
pd.read_sql("select * from potholes", con = engine)