In [None]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib as plt

In [None]:
df = pd.read_csv('data/NYCTaxiFares.csv')
df.head()

In [None]:
print(df['fare_amount'].describe())

In [None]:
def haversine_distance(df, lat1, long1, lat2, long2):
    """
    Calculates the haversine distance between 2 sets of GPS coordinates in df
    """
    r = 6371  # average radius of Earth in kilometers
       
    phi1 = np.radians(df[lat1])
    phi2 = np.radians(df[lat2])
    
    delta_phi = np.radians(df[lat2]-df[lat1])
    delta_lambda = np.radians(df[long2]-df[long1])
     
    a = np.sin(delta_phi/2)**2 + np.cos(phi1) * np.cos(phi2) * np.sin(delta_lambda/2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
    d = (r * c) # in kilometers

    return d

In [None]:
df['dist_km'] = haversine_distance(df, 'pickup_latitude', 'pickup_longitude', 'dropoff_latitude', 'dropoff_longitude')
df.head()

In [None]:
df.info()

In [None]:
df['pickup_datetime'] = pd.to_datetime(df['pickup_datetime'])
df.info()

In [None]:
df.head()

In [None]:
mytime = df['pickup_datetime'][0]
print(mytime)
print(mytime.hour)

In [None]:
df['EDTdate'] = df['pickup_datetime'] - pd.Timedelta(hours=4)

In [None]:
df.head()

In [None]:
df['Hour'] = df['EDTdate'].dt.hour
df['AMorPM'] = np.where(df['Hour']<12, 'am', 'pm')
df.head()

In [None]:
df['Weekday'] = df['EDTdate'].dt.strftime("%a")
df.head()