In [2]:
import numpy as np

# Load dataset (skip header row)
taxi = np.genfromtxt(
    'nyc_taxis.csv',
    delimiter=',',
    skip_header=1
)

# -----------------------------
# 1. Average Taxi Speed
# speed = distance / time (in hours)
# -----------------------------
distance = taxi[:, 7]          # trip distance
time_seconds = taxi[:, 8]      # trip duration in seconds

# Avoid division by zero
valid_trips = time_seconds > 0
speed = distance[valid_trips] / (time_seconds[valid_trips] / 3600)

mean_speed = speed.mean()
print("Average taxi speed:", round(mean_speed, 2), "units/hour")

# -----------------------------
# 2. Total rides in February
# -----------------------------
feb_rides = taxi[taxi[:, 1] == 2]
print("Total number of rides in February:", feb_rides.shape[0])

# -----------------------------
# 3. Rides with fare greater than 50
# -----------------------------
high_fare_rides = taxi[taxi[:, -3] > 50]
print("Number of rides with fare > 50:", high_fare_rides.shape[0])

# -----------------------------
# 4. Drop-off at JFK Airport
# (location code = 2)
# -----------------------------
jfk_drop = taxi[taxi[:, 6] == 2]
print("Number of drop-offs at JFK Airport:", jfk_drop.shape[0])

# -----------------------------
# 5. Pickup at JFK Airport
# (location code = 2)
# -----------------------------
jfk_pickup = taxi[taxi[:, 5] == 2]
print("Number of pickups at JFK Airport:", jfk_pickup.shape[0])


Average taxi speed: 32.24 units/hour
Total number of rides in February: 13333
Number of rides with fare > 50: 16
Number of drop-offs at JFK Airport: 11832
Number of pickups at JFK Airport: 29329
