In [None]:
def load_data(fname):

    """ Read the given database into two pandas dataframes. 
    
    Args: 
        fname (string): filename of sqlite3 database to read
        
    Returns:
        (pd.DataFrame, pd.DataFrame): a tuple of two dataframes, the first for the vehicle data and the 
                                      second for the prediction data. 
    """

    conn = sqlite3.connect(fname)
    vdf=pd.read_sql_query("SELECT * from vehicles WHERE vid NOT LIKE '';", conn)
    pdf =pd.read_sql_query("SELECT * from predictions WHERE tmstmp NOT LIKE '';", conn)
    vdf['vid'] = pd.to_numeric(vdf['vid'], errors='coerce')
    vdf['hdg'] = pd.to_numeric(vdf['hdg'], errors='coerce')  
    vdf['pid'] = pd.to_numeric(vdf['pid'], errors='coerce')
    vdf['pdist'] = pd.to_numeric(vdf['pdist'], errors='coerce')
    vdf['spd'] = pd.to_numeric(vdf['spd'], errors='coerce')
    vdf['tatripid'] = pd.to_numeric(vdf['tatripid'], errors='coerce')
    vdf['lat'] = pd.to_numeric(vdf['lat'], errors='coerce').astype(float)
    vdf['lon'] = pd.to_numeric(vdf['lon'], errors='coerce').astype(float)
    vdf['tmstmp'] = pd.to_datetime(vdf['tmstmp'])
    
    pdf['tmstmp'] = pd.to_datetime(pdf['tmstmp'])
    pdf['stpid'] = pd.to_numeric(pdf['stpid'], errors='coerce')
    pdf['vid'] = pd.to_numeric(pdf['vid'], errors='coerce')
    pdf['dstp'] = pd.to_numeric(pdf['dstp'], errors='coerce')
    pdf['prdtm'] = pd.to_datetime(pdf['prdtm'])
    pdf['tatripid'] = pd.to_numeric(pdf['tatripid'], errors='coerce')
    pdf['dly']=pdf['dly'].astype(bool)
    return(vdf,pdf)
    pass
    



def split_trips(df):
    """ Splits the dataframe of vehicle data into a list of dataframes for each individual trip. 
    
    Args: 
        df (pd.DataFrame): A dataframe containing vehicle data
        
    Returns: 
        (list): A list of dataframes, where each dataFrame contains vehicle data for a single trip
    """
    #df=df.sort_values(by=['tmstmp', 'pdist'])
    #df=df.set_index('tmstmp')
    #group_obj = df.groupby([df["des"],df["pid"],df["vid"]])
    #list_trips=[] 
    #diff_list=[]
    #final=[]
    #for group in group_obj:
        #list_trips.append(group[1])
    #for trip in list_trips:    
        #trip['diff'] = (trip.pdist.diff() < 0).cumsum()
        #for g in trip.groupby((trip.pdist.diff() < 0).cumsum()):
            #print("Group obj")
            #diff_list.append(g[1])
            #print(diff_list)
    
    #for df_obj in diff_list:
        #df_obj = df_obj.drop('diff', axis=1)
        #final.append(df_obj)
    #print(len(final))
    #return final
    
    def split(trip):
        if not increasing(trip.tmstmp) or not increasing(trip.pdist):
            trip = trip.sort_values(['tmstmp','pdist'],ascending=[True,True])
        indices=[0]
        i=1
        while i< len(trip):
            if trip['pdist'].iloc[i] < trip['pdist'].iloc[i-1]:
                indices.append(i)
            i+=1
        indices.append(i)
        return [trip[a:b].set_index('tmstmp') for (a,b) in zip(indices[:-1],indices[1:])]
    
    def increasing(L):
        return all(x<=y for x,y in zip(L[:-1],L[1:]))
    
    trips=[]
    for vid in df['vid'].unique():
        df0 = df[df['vid']==vid]
        for pid in df0['pid'].unique():
            df1=df0[df0['pid']==pid]
            trips+=split(df1)
    return trips
    
    pass