In [8]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import math
from IPython.display import Markdown,display
import datetime
import time
import re
import os



In [16]:
#Takes a dataframe and returns another
#Adds a colmn containing time difference between each line
def delay_col(df):
    df['delay']= pd.Series(np.empty(len(df)), index=df.index)
    for i in range(1,len(df)):
        df['delay'][i]=df['recorded_at'][i]-df['recorded_at'][i-1]
    df['delay'][0]=np.nan
    return df


#Takes a dataframe and time interval and returns a list of dataframes
#Divides each dataframe by ride (Each ride is seperated by the given interval)
def divide_asset_by_time(df,interval):
    df_list = []
    start_line = 0
    for i in range(1,len(df)): 
        if  df['recorded_at'][i]-df['recorded_at'][i-1]> interval:

            df_list.append(df.iloc[start_line:i])
            start_line = i     
        if i == len(df)-1:    
            df_list.append(df.iloc[start_line:len(df)])
    return df_list

#This function takes a dataframe and a string and returns a dataframe
#Fills forward the column in the dataframe
def fill(df,column):
    df[column]=df[column].fillna(method='ffill')
    return df
        
#This function takes a dataframe and returns another
#Input dataframe is a separate ride
#Adds a cloumn for distance travelled between each line
def calcul_dist(df):
    df=df.reset_index()
    df['distance']= pd.Series(np.empty(len(df)), index=df.index)
    for i in range(1,len(df)):
        df['distance'][i] = (df['recorded_at'][i]-df['recorded_at'][i-1]).total_seconds()*df['MDI_OBD_SPEED'][i]/3600
    return df

#This function takes a csv file a time interval a target directory 
#Takes an asset dataframe and :
#Adds a column for time delay between lines/recordings
#Then divides the asset dataframe into seperate rides depending on a given interval
#Forward fills MDI_OBD_SPEED
#Adds a column for fuel consumption between lines/recordings for each ride
#Adds a column for distance traveled between lines/recordings for each ride
#Generates csv files for each ride
def generate_rides_with_distance(file_name,interval,directory,n=None):
    df_asset=pd.read_csv(directory+"/"+file_name,parse_dates=[1],nrows=n,na_values=' ')
    os.makedirs(directory+"/"+file_name.split('.')[0]+"_"+str(n))
    new_dir=directory+"/"+file_name.split('.')[0]+"_"+str(n)
    df_asset['GPS_SPEED']=pd.to_numeric(df_asset['GPS_SPEED'],errors='coerce')
    df_asset['MDI_OBD_SPEED']=pd.to_numeric(df_asset['MDI_OBD_SPEED'],errors='coerce')
    df_asset['MDI_OBD_RPM']=pd.to_numeric(df_asset['MDI_OBD_RPM'],errors='coerce')
    df_asset['MDI_OBD_ENGINE_LOAD']=pd.to_numeric(df_asset['MDI_OBD_ENGINE_LOAD'],errors='coerce')
    df_asset['MDI_OBD_FUEL']=pd.to_numeric(df_asset['MDI_OBD_FUEL'],errors='coerce')
    df_asset['ODO_FULL_METER']=pd.to_numeric(df_asset['ODO_FULL_METER'],errors='coerce')
    df_asset['MDI_DASHBOARD_MILEAGE']=pd.to_numeric(df_asset['MDI_DASHBOARD_MILEAGE'],errors='coerce')
    ride_list=divide_asset_by_time(df_asset,interval)
    for idx,df in enumerate(ride_list):
        df=df.reset_index(drop=True)
        df['distance']= pd.Series(np.empty(len(df)), index=df.index)
        df['delay']= pd.Series(np.empty(len(df)), index=df.index)
        df['fuel']= pd.Series(np.empty(len(df)), index=df.index)
        df['filled']= pd.Series(np.empty(len(df)), index=df.index)
        last_fuel=np.nan
        last_speed=np.nan
        last_speed_time=np.nan
        for i in range(1,len(df)):
            #Condition
            if(not np.isnan(df['MDI_OBD_SPEED'][i])):
            
                df['filled'][i]=False
                last_speed_time=df['recorded_at'][i]
                last_speed=df['MDI_OBD_SPEED'][i]
            else :
                if(not np.isnan(last_speed) and df['recorded_at'][i]-last_speed_time < pd.Timedelta(minutes=1)):
            #Filling
                    df['MDI_OBD_SPEED'][i]=last_speed
                    df['filled'][i]=True
            
            #Condition
            if(not np.isnan(df['MDI_OBD_FUEL'][i])):
            
                if(not np.isnan(last_fuel)):
            #Filling
                    df['fuel'][i]=df['MDI_OBD_FUEL'][i]-last_fuel
                else:
                    df['fuel'][i]=np.nan
                last_fuel=df['MDI_OBD_FUEL'][i]    
            else:
                df['fuel'][i]=np.nan
                
            
            df['delay'][i]=df['recorded_at'][i]-df['recorded_at'][i-1]
            df['distance'][i] = 10*(df['recorded_at'][i]-df['recorded_at'][i-1]).total_seconds()*last_speed/36
        df['fuel'][0]=np.nan    
        df['distance'][0]=np.nan
        df['delay'][0]=np.nan
        df['filled'][0]=np.nan
        df['filled'] = df['filled'].astype('bool')
        df_name=new_dir+"/"+file_name.split('.')[0]+"_"+str(idx)+"."+file_name.split('.')[1]
        df.to_csv(df_name,na_rep=np.nan,index=False)
    return None
        
        
        
        

In [21]:
df_asset=pd.read_csv("data/data_asset_choosed/test.csv",parse_dates=[1])

In [None]:
generate_rides_with_distance("487.csv",pd.Timedelta(minutes=5),"data/data_asset_choosed")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a