### This code cleans trip data (checks for duplicates, recalculates and filters duration)

In [1]:
import pandas as pd  
import numpy as np  
import matplotlib.pyplot as plt  
import seaborn as sns
from sqlalchemy import create_engine, text
%matplotlib inline

In [2]:
database_name = 'scooters'

connection_string = f"postgresql://postgres:postgres@localhost:5432/{database_name}"

engine = create_engine(connection_string)

In [84]:
query = '''
SELECT *
FROM trips;
'''
#selects everything from the trips table
with engine.connect() as connection:    
    trips = pd.read_sql(text(query), con = connection)

#convert start and end times/dates into datetimes in new columns
trips['start_dt'] = pd.to_datetime(trips.startdate.astype(str)+ ' ' +trips.starttime.astype(str))
trips['end_dt'] = pd.to_datetime(trips.enddate.astype(str)+ ' ' +trips.endtime.astype(str))

trips.head() 

Unnamed: 0,pubtimestamp,companyname,triprecordnum,sumdid,tripduration,tripdistance,startdate,starttime,enddate,endtime,startlatitude,startlongitude,endlatitude,endlongitude,triproute,create_dt,start_dt,end_dt
0,2019-05-01 00:00:55.423,Bird,BRD2134,Powered9EAJL,3.0,958.00528,2019-05-01,00:00:20.460000,2019-05-01,00:02:52.346666,36.1571,-86.8036,36.1566,-86.8067,"[(36.157235, -86.803612), (36.157235, -86.8036...",2019-05-02 05:30:23.780,2019-05-01 00:00:20.460000,2019-05-01 00:02:52.346666
1,2019-05-01 00:03:33.147,Lyft,LFT5,Powered296631,1.7156,1371.39112,2019-05-01,00:01:50.090000,2019-05-01,00:03:33.026666,36.15797,-86.77896,36.16054,-86.77689,"[(36.15797, -86.77896), (36.15795, -86.77873),...",2019-05-02 07:20:32.757,2019-05-01 00:01:50.090000,2019-05-01 00:03:33.026666
2,2019-05-01 00:05:55.570,Bird,BRD2168,Powered7S2UU,3.0,2296.588,2019-05-01,00:03:47.363333,2019-05-01,00:07:13.596666,36.1547,-86.7818,36.1565,-86.7868,"[(36.155068, -86.782124), (36.156597, -86.78675)]",2019-05-02 05:30:24.530,2019-05-01 00:03:47.363333,2019-05-01 00:07:13.596666
3,2019-05-01 00:05:55.570,Bird,BRD2166,PoweredZIIVX,3.0,1200.78744,2019-05-01,00:04:21.386666,2019-05-01,00:06:59.176666,36.1494,-86.7795,36.1531,-86.7796,"[(36.149741, -86.779344), (36.149741, -86.7793...",2019-05-02 05:30:24.237,2019-05-01 00:04:21.386666,2019-05-01 00:06:59.176666
4,2019-05-01 00:05:55.570,Bird,BRD2165,PoweredJ7MB3,2.0,351.04988,2019-05-01,00:04:27.796666,2019-05-01,00:06:23.150000,36.1778,-86.7866,36.1774,-86.7876,"[(36.177699, -86.786477), (36.177711, -86.7864...",2019-05-02 05:30:24.207,2019-05-01 00:04:27.796666,2019-05-01 00:06:23.150000


In [86]:
#create new column showing length of each trip

trips['trip_length'] = trips.end_dt-trips.start_dt

In [87]:
trips = trips.loc[(trips.trip_length.dt.total_seconds() >= 60) & (trips.trip_length.dt.total_seconds() <= 86400)] #filters out invalid data
trips.shape

(552987, 19)

In [88]:
trips100 = trips.head(100) #sample dataset
trips100

Unnamed: 0,pubtimestamp,companyname,triprecordnum,sumdid,tripduration,tripdistance,startdate,starttime,enddate,endtime,startlatitude,startlongitude,endlatitude,endlongitude,triproute,create_dt,start_dt,end_dt,trip_length
0,2019-05-01 00:00:55.423,Bird,BRD2134,Powered9EAJL,3.000000,958.00528,2019-05-01,00:00:20.460000,2019-05-01,00:02:52.346666,36.15710,-86.80360,36.15660,-86.80670,"[(36.157235, -86.803612), (36.157235, -86.8036...",2019-05-02 05:30:23.780,2019-05-01 00:00:20.460000,2019-05-01 00:02:52.346666,0 days 00:02:31.886666
1,2019-05-01 00:03:33.147,Lyft,LFT5,Powered296631,1.715600,1371.39112,2019-05-01,00:01:50.090000,2019-05-01,00:03:33.026666,36.15797,-86.77896,36.16054,-86.77689,"[(36.15797, -86.77896), (36.15795, -86.77873),...",2019-05-02 07:20:32.757,2019-05-01 00:01:50.090000,2019-05-01 00:03:33.026666,0 days 00:01:42.936666
2,2019-05-01 00:05:55.570,Bird,BRD2168,Powered7S2UU,3.000000,2296.58800,2019-05-01,00:03:47.363333,2019-05-01,00:07:13.596666,36.15470,-86.78180,36.15650,-86.78680,"[(36.155068, -86.782124), (36.156597, -86.78675)]",2019-05-02 05:30:24.530,2019-05-01 00:03:47.363333,2019-05-01 00:07:13.596666,0 days 00:03:26.233333
3,2019-05-01 00:05:55.570,Bird,BRD2166,PoweredZIIVX,3.000000,1200.78744,2019-05-01,00:04:21.386666,2019-05-01,00:06:59.176666,36.14940,-86.77950,36.15310,-86.77960,"[(36.149741, -86.779344), (36.149741, -86.7793...",2019-05-02 05:30:24.237,2019-05-01 00:04:21.386666,2019-05-01 00:06:59.176666,0 days 00:02:37.790000
4,2019-05-01 00:05:55.570,Bird,BRD2165,PoweredJ7MB3,2.000000,351.04988,2019-05-01,00:04:27.796666,2019-05-01,00:06:23.150000,36.17780,-86.78660,36.17740,-86.78760,"[(36.177699, -86.786477), (36.177711, -86.7864...",2019-05-02 05:30:24.207,2019-05-01 00:04:27.796666,2019-05-01 00:06:23.150000,0 days 00:01:55.353334
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,2019-05-01 00:37:39.827,Lyft,LFT61,Powered594191,16.260350,11033.46492,2019-05-01,00:21:24.126666,2019-05-01,00:37:39.750000,36.15843,-86.77686,36.16911,-86.76755,"[(36.15843, -86.77686), (36.15838, -86.7769), ...",2019-05-02 07:20:34.553,2019-05-01 00:21:24.126666,2019-05-01 00:37:39.750000,0 days 00:16:15.623334
101,2019-05-01 00:37:41.263,Lyft,LFT62,Powered251145,8.953833,633.20212,2019-05-01,00:28:43.906666,2019-05-01,00:37:41.136666,36.16255,-86.77552,36.16285,-86.77590,"[(36.16255, -86.77552), (36.16259, -86.77561),...",2019-05-02 07:20:34.590,2019-05-01 00:28:43.906666,2019-05-01 00:37:41.136666,0 days 00:08:57.230000
102,2019-05-01 00:39:22.380,Lyft,LFT63,Powered915923,12.783283,13989.50176,2019-05-01,00:26:35.293333,2019-05-01,00:39:22.290000,36.14112,-86.79115,36.14984,-86.76578,"[(36.14112, -86.79115), (36.1412, -86.79111), ...",2019-05-02 07:20:34.620,2019-05-01 00:26:35.293333,2019-05-01 00:39:22.290000,0 days 00:12:46.996667
103,2019-05-01 00:39:41.220,Lyft,LFT64,Powered474035,3.505833,1105.64308,2019-05-01,00:36:10.770000,2019-05-01,00:39:41.120000,36.15129,-86.79662,36.15133,-86.79417,"[(36.15129, -86.79662), (36.15121, -86.79654),...",2019-05-02 07:20:34.650,2019-05-01 00:36:10.770000,2019-05-01 00:39:41.120000,0 days 00:03:30.350000


In [89]:
trips['date'] = trips['start_dt'].dt.date #adds date column for matching
trips

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trips['date'] = trips['start_dt'].dt.date #adds date column for matching


Unnamed: 0,pubtimestamp,companyname,triprecordnum,sumdid,tripduration,tripdistance,startdate,starttime,enddate,endtime,startlatitude,startlongitude,endlatitude,endlongitude,triproute,create_dt,start_dt,end_dt,trip_length,date
0,2019-05-01 00:00:55.423,Bird,BRD2134,Powered9EAJL,3.000000,958.00528,2019-05-01,00:00:20.460000,2019-05-01,00:02:52.346666,36.157100,-86.803600,36.156600,-86.806700,"[(36.157235, -86.803612), (36.157235, -86.8036...",2019-05-02 05:30:23.780,2019-05-01 00:00:20.460000,2019-05-01 00:02:52.346666,0 days 00:02:31.886666,2019-05-01
1,2019-05-01 00:03:33.147,Lyft,LFT5,Powered296631,1.715600,1371.39112,2019-05-01,00:01:50.090000,2019-05-01,00:03:33.026666,36.157970,-86.778960,36.160540,-86.776890,"[(36.15797, -86.77896), (36.15795, -86.77873),...",2019-05-02 07:20:32.757,2019-05-01 00:01:50.090000,2019-05-01 00:03:33.026666,0 days 00:01:42.936666,2019-05-01
2,2019-05-01 00:05:55.570,Bird,BRD2168,Powered7S2UU,3.000000,2296.58800,2019-05-01,00:03:47.363333,2019-05-01,00:07:13.596666,36.154700,-86.781800,36.156500,-86.786800,"[(36.155068, -86.782124), (36.156597, -86.78675)]",2019-05-02 05:30:24.530,2019-05-01 00:03:47.363333,2019-05-01 00:07:13.596666,0 days 00:03:26.233333,2019-05-01
3,2019-05-01 00:05:55.570,Bird,BRD2166,PoweredZIIVX,3.000000,1200.78744,2019-05-01,00:04:21.386666,2019-05-01,00:06:59.176666,36.149400,-86.779500,36.153100,-86.779600,"[(36.149741, -86.779344), (36.149741, -86.7793...",2019-05-02 05:30:24.237,2019-05-01 00:04:21.386666,2019-05-01 00:06:59.176666,0 days 00:02:37.790000,2019-05-01
4,2019-05-01 00:05:55.570,Bird,BRD2165,PoweredJ7MB3,2.000000,351.04988,2019-05-01,00:04:27.796666,2019-05-01,00:06:23.150000,36.177800,-86.786600,36.177400,-86.787600,"[(36.177699, -86.786477), (36.177711, -86.7864...",2019-05-02 05:30:24.207,2019-05-01 00:04:27.796666,2019-05-01 00:06:23.150000,0 days 00:01:55.353334,2019-05-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
565517,2019-08-01 04:53:48.000,JUMP,JMP3,Powered5614e4fc-1971-5e5c-a545-0fd88cd3331d,17.497950,7075.20000,2019-07-31,23:47:43,2019-08-01,00:05:13,36.154633,-86.798340,36.149220,-86.813980,"[('36.154631', '-86.798341'), ('36.154631', '-...",2019-08-02 08:21:27.127,2019-07-31 23:47:43.000000,2019-08-01 00:05:13.000000,0 days 00:17:30,2019-07-31
565518,2019-08-01 04:53:48.000,JUMP,JMP2,Powereda4712099-5e55-5332-996c-d6e5c910535d,3.154917,1320.00000,2019-07-31,23:57:19,2019-08-01,00:00:28,36.153015,-86.783585,36.152843,-86.779594,"[('36.153016', '-86.783586'), ('36.153016', '-...",2019-08-02 08:21:26.773,2019-07-31 23:57:19.000000,2019-08-01 00:00:28.000000,0 days 00:03:09,2019-07-31
565519,2019-08-01 04:53:48.000,JUMP,JMP1,Poweredb8a3a269-d1ca-571f-9b2f-89b7399b5537,18.440650,7920.00000,2019-07-31,23:41:52,2019-08-01,00:00:18,36.153687,-86.784580,36.164158,-86.776900,"[('36.153685', '-86.784578'), ('36.153685', '-...",2019-08-02 08:21:26.803,2019-07-31 23:41:52.000000,2019-08-01 00:00:18.000000,0 days 00:18:26,2019-07-31
565520,2019-08-01 07:04:00.000,JUMP,JMP35,Poweredf077a919-d569-5e70-8ca7-71d179ffacf9,142.345610,20433.60000,2019-07-31,23:26:15,2019-08-01,01:48:35,36.155735,-86.775185,36.173904,-86.785450,"[('36.155736', '-86.775181'), ('36.155531', '-...",2019-08-02 08:24:21.967,2019-07-31 23:26:15.000000,2019-08-01 01:48:35.000000,0 days 02:22:20,2019-07-31


In [90]:
trip_count = trips.groupby(['companyname','sumdid','date']).size().reset_index().rename(columns={0:'tripcount'}) #trip count per day per scooter
trip_count

Unnamed: 0,companyname,sumdid,date,tripcount
0,Bird,Powered11MUW,2019-07-26,1
1,Bird,Powered11XTN,2019-05-26,3
2,Bird,Powered11XTN,2019-05-28,1
3,Bird,Powered11XTN,2019-05-29,1
4,Bird,Powered11XTN,2019-05-30,2
...,...,...,...,...
201240,SPIN,Powered9963670,2019-05-18,2
201241,SPIN,Powered9963670,2019-05-19,4
201242,SPIN,Powered9963670,2019-05-24,1
201243,SPIN,Powered9963670,2019-05-25,2


In [91]:
use = pd.merge(avail,trip_count[['sumdid','date','tripcount']],on=['sumdid','date'], how='left') #merge datasets
use.head()

Unnamed: 0,companyname,sumdid,date,count,tripcount
0,Bird,Powered11MUW,2019-07-26,15,1.0
1,Bird,Powered11MUW,2019-07-27,193,
2,Bird,Powered11MUW,2019-07-28,123,
3,Bird,Powered11MUW,2019-07-29,69,
4,Bird,Powered11MUW,2019-07-30,12,


In [92]:
use['tripcount_filled'] = use['tripcount'].fillna(0) #adds a new column replacing nan with zero

In [93]:
use.describe()

Unnamed: 0,count,tripcount,tripcount_filled
count,413563.0,200608.0,413563.0
mean,177.515984,2.738789,1.328511
std,94.567047,2.663888,2.305599
min,1.0,1.0,0.0
25%,95.0,1.0,0.0
50%,170.0,2.0,0.0
75%,285.0,3.0,2.0
max,289.0,220.0,220.0


In [58]:
use.groupby('companyname')[['tripcount','tripcount_filled']].mean() #show average use per scooter per day by company including and excluding nan values

Unnamed: 0_level_0,tripcount,tripcount_filled
companyname,Unnamed: 1_level_1,Unnamed: 2_level_1
Bird,1.775958,0.915804
Bolt,1.961595,0.892922
Gotcha,2.42575,0.183212
Jump,1.408359,0.041622
Lime,3.582928,2.391354
Lyft,2.614627,1.675956
Spin,1.886672,0.765528


In [94]:
use.value_counts('count').sort_index()

count
1        995
2        761
3        647
4        665
5        660
       ...  
285     6237
286     9915
287    27663
288    63118
289       10
Length: 289, dtype: int64

In [95]:
unused = use.loc[use.tripcount_filled == 0]
unused

Unnamed: 0,companyname,sumdid,date,count,tripcount,tripcount_filled
1,Bird,Powered11MUW,2019-07-27,193,,0.0
2,Bird,Powered11MUW,2019-07-28,123,,0.0
3,Bird,Powered11MUW,2019-07-29,69,,0.0
4,Bird,Powered11MUW,2019-07-30,12,,0.0
5,Bird,Powered11MUW,2019-07-31,89,,0.0
...,...,...,...,...,...,...
413558,Spin,Powered9963670,2019-05-27,100,,0.0
413559,Spin,Powered9963670,2019-05-28,108,,0.0
413560,Spin,Powered9963670,2019-05-29,61,,0.0
413561,Spin,Powered9963670,2019-06-16,11,,0.0


In [96]:
underused = use.loc[(use.tripcount < 3) & (use.tripcount != 0)]
underused

Unnamed: 0,companyname,sumdid,date,count,tripcount,tripcount_filled
0,Bird,Powered11MUW,2019-07-26,15,1.0,1.0
8,Bird,Powered11XTN,2019-05-28,154,1.0,1.0
9,Bird,Powered11XTN,2019-05-29,97,1.0,1.0
10,Bird,Powered11XTN,2019-05-30,141,2.0,2.0
11,Bird,Powered11XTN,2019-05-31,136,1.0,1.0
...,...,...,...,...,...,...
413547,Spin,Powered9963670,2019-05-16,163,1.0,1.0
413549,Spin,Powered9963670,2019-05-18,202,2.0,2.0
413555,Spin,Powered9963670,2019-05-24,167,1.0,1.0
413556,Spin,Powered9963670,2019-05-25,186,2.0,2.0


In [97]:
suff_use = use.loc[use.tripcount >= 3]
suff_use

Unnamed: 0,companyname,sumdid,date,count,tripcount,tripcount_filled
6,Bird,Powered11XTN,2019-05-26,29,3.0,3.0
21,Bird,Powered11XTN,2019-06-10,112,4.0,4.0
29,Bird,Powered11XTN,2019-06-25,182,3.0,3.0
33,Bird,Powered11XTN,2019-06-29,64,3.0,3.0
53,Bird,Powered11XTN,2019-07-21,33,4.0,4.0
...,...,...,...,...,...,...
413493,Spin,Powered9958429,2019-05-31,184,3.0,3.0
413495,Spin,Powered9958429,2019-06-02,165,3.0,3.0
413501,Spin,Powered9958429,2019-06-08,108,3.0,3.0
413503,Spin,Powered9958429,2019-06-10,167,3.0,3.0


In [98]:
test = trips.duplicated(subset=['sumdid','starttime','startdate'], keep='first')
test

0         False
1         False
2         False
3         False
4         False
          ...  
565517    False
565518    False
565519    False
565520    False
565521    False
Length: 552987, dtype: bool

In [99]:
test.value_counts()

False    518246
True      34741
dtype: int64

In [100]:
trips.loc[trips.duplicated(subset=['sumdid','starttime','startdate'], keep='first') == True].value_counts('companyname')

companyname
Lime             33841
Gotcha             823
Lyft                49
JUMP                21
Bird                 5
Bolt Mobility        2
dtype: int64

In [101]:
trips.loc[trips.duplicated(subset=['sumdid','starttime','startdate'], keep='first') == True].value_counts(['sumdid','companyname'])

sumdid                                        companyname  
PoweredWVIAXNCVDNOXP                          Lime             257
PoweredR3QXO6TL2HIAB                          Lime             233
PoweredEGRN7CFK6Q6K2                          Lime             222
PoweredRJ7YYJTHAR5N3                          Lime             209
PoweredDJQATZBXPBUAE                          Lime             183
                                                              ... 
PoweredP7EA7PDZH6H4H                          Lime               1
PoweredP5IOOPFILEFAF                          Lime               1
Powered643348                                 Lyft               1
Powered650754                                 Lyft               1
Powered-3c096a24-c7ce-0c6b-f380-301ef2d96de6  Bolt Mobility      1
Length: 1727, dtype: int64

In [102]:
trips.loc[(trips.duplicated(subset=['sumdid','starttime','startdate'], keep='first') == True) & (trips.sumdid == 'PoweredEGRN7CFK6Q6K2')]

Unnamed: 0,pubtimestamp,companyname,triprecordnum,sumdid,tripduration,tripdistance,startdate,starttime,enddate,endtime,startlatitude,startlongitude,endlatitude,endlongitude,triproute,create_dt,start_dt,end_dt,trip_length,date
100625,2019-05-17 12:51:43.710,Lime,LIM1157,PoweredEGRN7CFK6Q6K2,17.966667,733.6536,2019-05-17,06:32:58,2019-05-17,06:50:56,36.104804,-86.819145,36.108471,-86.834578,"[(36.1048, -86.81914), (36.10482, -86.81917), ...",2019-05-18 12:14:10.350,2019-05-17 06:32:58,2019-05-17 06:50:56,0 days 00:17:58,2019-05-17
100763,2019-05-17 14:01:49.003,Lime,LIM1249,PoweredEGRN7CFK6Q6K2,17.966667,733.6536,2019-05-17,06:32:58,2019-05-17,06:50:56,36.104804,-86.819145,36.108471,-86.834578,"[(36.1048, -86.81914), (36.10482, -86.81917), ...",2019-05-18 12:23:52.403,2019-05-17 06:32:58,2019-05-17 06:50:56,0 days 00:17:58,2019-05-17
100911,2019-05-17 14:01:49.003,Lime,LIM1248,PoweredEGRN7CFK6Q6K2,4.516667,177.3936,2019-05-17,06:08:48,2019-05-17,06:13:19,36.104992,-86.819233,36.104841,-86.819090,"[(36.10499, -86.81923), (36.10481, -86.8191500...",2019-05-18 12:23:52.370,2019-05-17 06:08:48,2019-05-17 06:13:19,0 days 00:04:31,2019-05-17
101192,2019-05-17 14:56:54.050,Lime,LIM1348,PoweredEGRN7CFK6Q6K2,4.516667,177.3936,2019-05-17,06:08:48,2019-05-17,06:13:19,36.104992,-86.819233,36.104841,-86.819090,"[(36.10499, -86.81923), (36.10481, -86.8191500...",2019-05-18 12:32:03.960,2019-05-17 06:08:48,2019-05-17 06:13:19,0 days 00:04:31,2019-05-17
101194,2019-05-17 14:56:54.050,Lime,LIM1349,PoweredEGRN7CFK6Q6K2,17.966667,733.6536,2019-05-17,06:32:58,2019-05-17,06:50:56,36.104804,-86.819145,36.108471,-86.834578,"[(36.1048, -86.81914), (36.10482, -86.81917), ...",2019-05-18 12:32:03.990,2019-05-17 06:32:58,2019-05-17 06:50:56,0 days 00:17:58,2019-05-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
215798,2019-05-30 20:50:18.503,Lime,LIM1694,PoweredEGRN7CFK6Q6K2,6.350000,3500.6136,2019-05-30,00:50:29,2019-05-30,00:56:50,36.125224,-86.789938,36.121278,-86.792372,"[(36.12522, -86.78994), (36.12523, -86.78993),...",2019-05-31 07:45:26.800,2019-05-30 00:50:29,2019-05-30 00:56:50,0 days 00:06:21,2019-05-30
216211,2019-05-30 21:50:19.667,Lime,LIM1872,PoweredEGRN7CFK6Q6K2,6.350000,3500.6136,2019-05-30,00:50:29,2019-05-30,00:56:50,36.125224,-86.789938,36.121278,-86.792372,"[(36.12522, -86.78994), (36.12523, -86.78993),...",2019-05-31 07:46:34.360,2019-05-30 00:50:29,2019-05-30 00:56:50,0 days 00:06:21,2019-05-30
216594,2019-05-30 22:50:19.573,Lime,LIM2065,PoweredEGRN7CFK6Q6K2,6.350000,3500.6136,2019-05-30,00:50:29,2019-05-30,00:56:50,36.125224,-86.789938,36.121278,-86.792372,"[(36.12522, -86.78994), (36.12523, -86.78993),...",2019-05-31 07:47:48.973,2019-05-30 00:50:29,2019-05-30 00:56:50,0 days 00:06:21,2019-05-30
217026,2019-05-30 23:50:20.257,Lime,LIM2235,PoweredEGRN7CFK6Q6K2,6.350000,3500.6136,2019-05-30,00:50:29,2019-05-30,00:56:50,36.125224,-86.789938,36.121278,-86.792372,"[(36.12522, -86.78994), (36.12523, -86.78993),...",2019-05-31 07:49:10.100,2019-05-30 00:50:29,2019-05-30 00:56:50,0 days 00:06:21,2019-05-30


In [103]:
with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(trips.loc[trips.duplicated(subset=['sumdid','starttime','startdate'], keep='first') == True].value_counts(['sumdid','companyname','startdate']))

sumdid                                        companyname    startdate 
PoweredWVIAXNCVDNOXP                          Lime           2019-05-17    217
PoweredEGRN7CFK6Q6K2                          Lime           2019-05-17    199
PoweredRJ7YYJTHAR5N3                          Lime           2019-05-17    184
PoweredR3QXO6TL2HIAB                          Lime           2019-05-17    175
PoweredDJQATZBXPBUAE                          Lime           2019-05-17    170
PoweredRRC5FV6DYYHBL                          Lime           2019-05-16    114
PoweredKCXMXKUIP3Z4E                          Lime           2019-05-17    113
PoweredPDGQQ2VKBRR6N                          Lime           2019-05-16    108
PoweredOYKGTYGE4MKYC                          Lime           2019-05-16    106
Powered4FCVKYQ3DTH7H                          Lime           2019-05-17    101
PoweredLNAVSI3QKRPVH                          Lime           2019-05-17    100
Powered3SDFC5NC4XZPZ                          Lime         

In [104]:
trips.shape

(552987, 20)

In [105]:
trips_cleaned = trips.loc[trips.duplicated(subset=['sumdid','starttime','startdate'], keep='first') == False]
trips_cleaned

Unnamed: 0,pubtimestamp,companyname,triprecordnum,sumdid,tripduration,tripdistance,startdate,starttime,enddate,endtime,startlatitude,startlongitude,endlatitude,endlongitude,triproute,create_dt,start_dt,end_dt,trip_length,date
0,2019-05-01 00:00:55.423,Bird,BRD2134,Powered9EAJL,3.000000,958.00528,2019-05-01,00:00:20.460000,2019-05-01,00:02:52.346666,36.157100,-86.803600,36.156600,-86.806700,"[(36.157235, -86.803612), (36.157235, -86.8036...",2019-05-02 05:30:23.780,2019-05-01 00:00:20.460000,2019-05-01 00:02:52.346666,0 days 00:02:31.886666,2019-05-01
1,2019-05-01 00:03:33.147,Lyft,LFT5,Powered296631,1.715600,1371.39112,2019-05-01,00:01:50.090000,2019-05-01,00:03:33.026666,36.157970,-86.778960,36.160540,-86.776890,"[(36.15797, -86.77896), (36.15795, -86.77873),...",2019-05-02 07:20:32.757,2019-05-01 00:01:50.090000,2019-05-01 00:03:33.026666,0 days 00:01:42.936666,2019-05-01
2,2019-05-01 00:05:55.570,Bird,BRD2168,Powered7S2UU,3.000000,2296.58800,2019-05-01,00:03:47.363333,2019-05-01,00:07:13.596666,36.154700,-86.781800,36.156500,-86.786800,"[(36.155068, -86.782124), (36.156597, -86.78675)]",2019-05-02 05:30:24.530,2019-05-01 00:03:47.363333,2019-05-01 00:07:13.596666,0 days 00:03:26.233333,2019-05-01
3,2019-05-01 00:05:55.570,Bird,BRD2166,PoweredZIIVX,3.000000,1200.78744,2019-05-01,00:04:21.386666,2019-05-01,00:06:59.176666,36.149400,-86.779500,36.153100,-86.779600,"[(36.149741, -86.779344), (36.149741, -86.7793...",2019-05-02 05:30:24.237,2019-05-01 00:04:21.386666,2019-05-01 00:06:59.176666,0 days 00:02:37.790000,2019-05-01
4,2019-05-01 00:05:55.570,Bird,BRD2165,PoweredJ7MB3,2.000000,351.04988,2019-05-01,00:04:27.796666,2019-05-01,00:06:23.150000,36.177800,-86.786600,36.177400,-86.787600,"[(36.177699, -86.786477), (36.177711, -86.7864...",2019-05-02 05:30:24.207,2019-05-01 00:04:27.796666,2019-05-01 00:06:23.150000,0 days 00:01:55.353334,2019-05-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
565517,2019-08-01 04:53:48.000,JUMP,JMP3,Powered5614e4fc-1971-5e5c-a545-0fd88cd3331d,17.497950,7075.20000,2019-07-31,23:47:43,2019-08-01,00:05:13,36.154633,-86.798340,36.149220,-86.813980,"[('36.154631', '-86.798341'), ('36.154631', '-...",2019-08-02 08:21:27.127,2019-07-31 23:47:43.000000,2019-08-01 00:05:13.000000,0 days 00:17:30,2019-07-31
565518,2019-08-01 04:53:48.000,JUMP,JMP2,Powereda4712099-5e55-5332-996c-d6e5c910535d,3.154917,1320.00000,2019-07-31,23:57:19,2019-08-01,00:00:28,36.153015,-86.783585,36.152843,-86.779594,"[('36.153016', '-86.783586'), ('36.153016', '-...",2019-08-02 08:21:26.773,2019-07-31 23:57:19.000000,2019-08-01 00:00:28.000000,0 days 00:03:09,2019-07-31
565519,2019-08-01 04:53:48.000,JUMP,JMP1,Poweredb8a3a269-d1ca-571f-9b2f-89b7399b5537,18.440650,7920.00000,2019-07-31,23:41:52,2019-08-01,00:00:18,36.153687,-86.784580,36.164158,-86.776900,"[('36.153685', '-86.784578'), ('36.153685', '-...",2019-08-02 08:21:26.803,2019-07-31 23:41:52.000000,2019-08-01 00:00:18.000000,0 days 00:18:26,2019-07-31
565520,2019-08-01 07:04:00.000,JUMP,JMP35,Poweredf077a919-d569-5e70-8ca7-71d179ffacf9,142.345610,20433.60000,2019-07-31,23:26:15,2019-08-01,01:48:35,36.155735,-86.775185,36.173904,-86.785450,"[('36.155736', '-86.775181'), ('36.155531', '-...",2019-08-02 08:24:21.967,2019-07-31 23:26:15.000000,2019-08-01 01:48:35.000000,0 days 02:22:20,2019-07-31
