# 2. According to Second Substitute Bill BL2018-1202 (as amended) (https://web.archive.org/web/20181019234657/https://www.nashville.gov/Metro-Clerk/Legislative/Ordinances/Details/7d2cf076-b12c-4645-a118-b530577c5ee8/2015-2019/BL2018-1202.aspx), all permitted operators will first clean data before providing or reporting data to Metro. Data processing and cleaning shall include:  
## * Removal of staff servicing and test trips  
## * Removal of trips below one minute  
## * Trip lengths are capped at 24 hours  
## Are the scooter companies in compliance with the second and third part of this rule? 

### ------------------------------------------------------------------------------------------------------------------------------------

In [None]:
from sqlalchemy import create_engine, text
import pandas as pd

#### setting up engine to use sql querries

In [None]:
database_name = 'scooters'

connection_string = f"postgresql://postgres:postgres@localhost:5432/{database_name}"

In [None]:
engine = create_engine(connection_string)

### Checking to see if companies are in compliance with 'removal of trips below one minute'

In [None]:
trip_query = '''
SELECT *
FROM trips
WHERE tripduration <= 1.0;
'''

with engine.connect() as connection:
    trips_below_1min = pd.read_sql(text(trip_query), con = connection)

In [None]:
trips_below_1min

In [None]:
#seeing how many entries are non-compliant with rule 2 per each company

trips_below_1min.groupby(['companyname'])['sumdid'].count()

### Amount of entries that are non-compliant with rule 2

#### Bird: 9,321
#### Gotcha: 564
#### Lime: 2,125
#### Lyft: 4,530

### Checking to see if companies are in compliance with 'trip lengths are capped at 24 hours'

In [None]:
triplength_query = '''
SELECT *
FROM trips
WHERE tripduration > (24 * 60);
'''

with engine.connect() as connection:
    trips_above_24hour = pd.read_sql(text(triplength_query), con = connection)

In [None]:
trips_above_24hour

In [None]:
#seeing how many entries are non-compliant with rule 3 per each company

trips_above_24hour.groupby(['companyname'])['sumdid'].count()

### Amount of entries that are non-compliant with rule 3

#### Bolt Mobility: 6,908
#### Lyft: 2
#### SPIN: 28

### Checking to see which companies were in compliance with both rules

In [None]:
compliant_query = '''
SELECT DISTINCT(companyname)
FROM trips
WHERE companyname NOT IN ('Bird', 'Gotcha', 'Lime', 'Lyft', 'Bolt Mobility', 'SPIN');
'''

with engine.connect() as connection:
    compliant_companies = pd.read_sql(text(compliant_query), con = connection)

In [None]:
compliant_companies

### JUMP is the only company that is compliant with rules 2 and 3

# The code below will join the tables and bring in only the data that meets the 'cleaned' requirements.

In [None]:
cleaning_query = '''
SELECT *
       
FROM trips
INNER JOIN scooters
ON trips.sumdid = scooters.sumdid
    AND trips.pubtimestamp = scooters.pubdatetime
    
WHERE tripduration < (24 * 60)
    AND tripduration > 1
LIMIT 5;
'''

with engine.connect() as connection:
    cleaned_df = pd.read_sql(text(cleaning_query), con = connection)

In [None]:
cleaned_df[['sumdid','companyname','chargelevel','latitude', 'longitude', 'startlatitude', 'startlongitude','endlatitude', 'endlongitude','pubtimestamp','pubdatetime', 'create_dt', 'startdate', 'starttime']]