# Converting Longitude and Latitude (Distance Measurements) and Final Preparations

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Assume DataFrame is already loaded into variable `df`
df = pd.read_csv('uber_USA.csv')

In [None]:
df.head()

Unnamed: 0,Fare Amount ($),Pickup Longitude,Pickup Latitude,Dropoff Longitude,Dropoff Latitude,Number of Passengers,Year,Month,Day of Week,Hour,Pick Up Date,Pick Up Time,AM/PM
0,7.5,-73.999817,40.738354,-73.999512,40.723217,1.0,2015.0,5.0,3.0,19.0,2015-05-07,19:52:06,PM
1,7.7,-73.994355,40.728225,-73.99471,40.750325,1.0,2009.0,7.0,4.0,20.0,2009-07-17,20:04:56,PM
2,12.9,-74.005043,40.74077,-73.962565,40.772647,1.0,2009.0,8.0,0.0,21.0,2009-08-24,21:45:00,PM
3,5.3,-73.976124,40.790844,-73.965316,40.803349,3.0,2009.0,6.0,4.0,8.0,2009-06-26,08:22:21,AM
4,16.0,-73.925023,40.744085,-73.973082,40.761247,5.0,2014.0,8.0,3.0,17.0,2014-08-28,17:47:00,PM


In [None]:
import pandas as pd
import numpy as np

# Function to convert degrees to radians
def radians(series):
    return np.radians(series)

# Function to calculate distance between two lat/lon points in meters using the Haversine formula
def haversine_distance(row):
    # Radius of the Earth in kilometers
    R = 6371.0

    # Convert latitude and longitude from degrees to radians
    lat1, lon1, lat2, lon2 = map(radians, [row['Pickup Latitude'], row['Pickup Longitude'], row['Dropoff Latitude'], row['Dropoff Longitude']])

    # Differences in coordinates
    dlat = lat2 - lat1
    dlon = lon2 - lon1

    # Haversine formula
    a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

    # Distance in kilometers
    distance_km = R * c

    # Convert distance from kilometers to meters
    distance_meters = distance_km * 1000

    return distance_meters

# Apply the function to each row in the DataFrame
df['distance_meters'] = df.apply(haversine_distance, axis=1)

# Now 'df' includes a new column 'distance_meters' which is the distance in meters between the two points


In [None]:
# Conversion factor: number of meters in a mile
meters_in_a_mile = 1600

# Create a new column 'distance_miles' by converting 'distance_meters' to miles
df['distance_miles'] = df['distance_meters'] / meters_in_a_mile

# Now 'df' includes the new column 'distance_miles'
df

Unnamed: 0,Fare Amount ($),Pickup Longitude,Pickup Latitude,Dropoff Longitude,Dropoff Latitude,Number of Passengers,Year,Month,Day of Week,Hour,Pick Up Date,Pick Up Time,AM/PM,distance_meters,distance_miles
0,7.5,-73.999817,40.738354,-73.999512,40.723217,1.0,2015.0,5.0,3.0,19.0,2015-05-07,19:52:06,PM,1683.322752,1.052077
1,7.7,-73.994355,40.728225,-73.994710,40.750325,1.0,2009.0,7.0,4.0,20.0,2009-07-17,20:04:56,PM,2457.589884,1.535994
2,12.9,-74.005043,40.740770,-73.962565,40.772647,1.0,2009.0,8.0,0.0,21.0,2009-08-24,21:45:00,PM,5036.377190,3.147736
3,5.3,-73.976124,40.790844,-73.965316,40.803349,3.0,2009.0,6.0,4.0,8.0,2009-06-26,08:22:21,AM,1661.683458,1.038552
4,16.0,-73.925023,40.744085,-73.973082,40.761247,5.0,2014.0,8.0,3.0,17.0,2014-08-28,17:47:00,PM,4475.449963,2.797156
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23305,7.3,-73.981352,40.729052,-73.992495,40.724092,6.0,2012.0,1.0,1.0,8.0,2012-01-31,08:42:00,AM,1088.981140,0.680613
23306,7.7,-73.995852,40.738777,-74.005760,40.714527,1.0,2012.0,6.0,1.0,21.0,2012-06-05,21:22:00,PM,2822.777777,1.764236
23307,5.3,-73.983031,40.735085,-73.975808,40.733234,1.0,2010.0,11.0,2.0,22.0,2010-11-17,22:02:05,PM,642.453327,0.401533
23308,4.1,-73.986168,40.726357,-73.988960,40.734142,2.0,2009.0,4.0,4.0,8.0,2009-04-24,08:27:00,AM,897.051735,0.560657


In [None]:
df = df.drop(['Pick Up Date', 'Pick Up Time', 'distance_meters'], axis=1)

In [None]:
df = df.drop(['AM/PM'], axis=1)

In [None]:
df.dropna()

Unnamed: 0,Fare Amount ($),Pickup Longitude,Pickup Latitude,Dropoff Longitude,Dropoff Latitude,Number of Passengers,Year,Month,Day of Week,Hour,distance_miles
0,7.5,-73.999817,40.738354,-73.999512,40.723217,1.0,2015.0,5.0,3.0,19.0,1.052077
1,7.7,-73.994355,40.728225,-73.994710,40.750325,1.0,2009.0,7.0,4.0,20.0,1.535994
2,12.9,-74.005043,40.740770,-73.962565,40.772647,1.0,2009.0,8.0,0.0,21.0,3.147736
3,5.3,-73.976124,40.790844,-73.965316,40.803349,3.0,2009.0,6.0,4.0,8.0,1.038552
4,16.0,-73.925023,40.744085,-73.973082,40.761247,5.0,2014.0,8.0,3.0,17.0,2.797156
...,...,...,...,...,...,...,...,...,...,...,...
23304,5.0,-73.993392,40.727962,-73.993392,40.727962,1.0,2014.0,7.0,6.0,0.0,0.000000
23305,7.3,-73.981352,40.729052,-73.992495,40.724092,6.0,2012.0,1.0,1.0,8.0,0.680613
23306,7.7,-73.995852,40.738777,-74.005760,40.714527,1.0,2012.0,6.0,1.0,21.0,1.764236
23307,5.3,-73.983031,40.735085,-73.975808,40.733234,1.0,2010.0,11.0,2.0,22.0,0.401533


In [None]:
df.to_csv('final_uber.csv', index=False)

from google.colab import files
files.download('final_uber.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
df.to_excel('df.xlsx', index=False)

from google.colab import files
files.download('df.xlsx')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>