#### IMPORTS

In [5]:
import pandas as pd # 2.2.0
from datetime import date

### CSV -> DF

In [6]:
# Will fail, if csv are missing
df = pd.read_csv("green_tripdata_2019-09.csv.gz", low_memory=False)
df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)
df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
df_zone = pd.read_csv("taxi+_zone_lookup.csv")

### Question 3. Count records

In [7]:
a = date(2019,9,18)

df.query("(lpep_pickup_datetime.dt.date == @a) & (lpep_dropoff_datetime.dt.date == @a)").shape[0]

15612

### Question 4. Largest trip for each day

In [8]:
(df.loc[df.lpep_pickup_datetime.dt.date.isin([date(2019,9,18), date(2019,9,16), date(2019,9,26), date(2019,9,21)]), ["lpep_pickup_datetime", "trip_distance"]]
 .groupby(df.lpep_pickup_datetime.dt.date)
 .agg({"trip_distance":"max"}))

Unnamed: 0_level_0,trip_distance
lpep_pickup_datetime,Unnamed: 1_level_1
2019-09-16,114.3
2019-09-18,70.28
2019-09-21,135.53
2019-09-26,341.64


### Question 5. Three biggest pick up Boroughs

In [9]:
a = date(2019,9,18)
(df.query("lpep_pickup_datetime.dt.date == @a").merge(df_zone, left_on="PULocationID", right_on="LocationID", how="outer")[["PULocationID", "Borough", "total_amount"]]
 .groupby("Borough")
 .agg({"total_amount": "sum"})
 .sort_values("total_amount", ascending=False)
 .head(3))

Unnamed: 0_level_0,total_amount
Borough,Unnamed: 1_level_1
Brooklyn,96333.24
Manhattan,92271.3
Queens,78671.71


### Question 6. Largest tip

In [10]:
# merge PU with zone
a = df[["lpep_pickup_datetime", "PULocationID", "DOLocationID", "tip_amount"]].merge(df_zone, left_on="PULocationID", right_on="LocationID", how="outer").rename(columns={"Borough": "PUBorough", "Zone": "PUZone", "service_zone": "PUservice_zone"}).drop(columns="LocationID")
# merge DO with zone
a = a.merge(df_zone, left_on="DOLocationID", right_on="LocationID", how="outer").rename(columns={"Borough": "DOBorough", "Zone": "DOZone", "service_zone": "DOservice_zone"}).drop(columns="LocationID")

a.query("lpep_pickup_datetime.dt.month == 9 & lpep_pickup_datetime.dt.year == 2019 & PUZone == 'Astoria'").sort_values("tip_amount", ascending=False)

Unnamed: 0,lpep_pickup_datetime,PULocationID,DOLocationID,tip_amount,PUBorough,PUZone,PUservice_zone,DOBorough,DOZone,DOservice_zone
232720,2019-09-08 18:10:40,7.0,132.0,62.31,Queens,Astoria,Boro Zone,Queens,JFK Airport,Airports
433137,2019-09-15 02:01:47,7.0,260.0,30.00,Queens,Astoria,Boro Zone,Queens,Woodside,Boro Zone
240991,2019-09-25 10:24:32,7.0,137.0,28.00,Queens,Astoria,Boro Zone,Manhattan,Kips Bay,Yellow Zone
445128,2019-09-03 04:25:59,7.0,264.0,25.00,Queens,Astoria,Boro Zone,Unknown,NV,
403011,2019-09-12 20:36:36,7.0,239.0,20.00,Queens,Astoria,Boro Zone,Manhattan,Upper West Side South,Yellow Zone
...,...,...,...,...,...,...,...,...,...,...
217935,2019-09-10 15:16:08,7.0,129.0,0.00,Queens,Astoria,Boro Zone,Queens,Jackson Heights,Boro Zone
217936,2019-09-10 17:25:41,7.0,129.0,0.00,Queens,Astoria,Boro Zone,Queens,Jackson Heights,Boro Zone
217937,2019-09-10 18:47:25,7.0,129.0,0.00,Queens,Astoria,Boro Zone,Queens,Jackson Heights,Boro Zone
217938,2019-09-10 19:38:17,7.0,129.0,0.00,Queens,Astoria,Boro Zone,Queens,Jackson Heights,Boro Zone
