In [1]:
import pandas as pd
import numpy as np
from math import radians, acos, sin, cos, trunc

In [2]:
def distance(lat1,lon1,lat2,lon2):
    if (lat1==lat2 and lon1==lon2):
        return 0
    else:
        R=3961 # Radius of the Earth in miles
        slat = radians(lat1)
        slon = radians(lon1)
        elat = radians(lat2)
        elon = radians(lon2)
        dist = 3961 * acos(sin(slat)*sin(elat) + cos(slat)*cos(elat)*cos(slon - elon))
        return dist

In [3]:
def bikehistory(inputfile_df, month):
    datafile_df = inputfile_df.groupby(['bikeid'])['tripduration'].sum()
    datafile_df = datafile_df.to_frame()
    datafile_df['hours'] = round(datafile_df['tripduration']/3600,0)
    datafile_df.hours=datafile_df.hours.astype('int64')

    bike_history_df=pd.DataFrame(columns=['month','bikeid','mileage','biketrips'])
    for row in range(len(datafile_df)):
        bikeid = datafile_df.iloc[row].name
        bikepath_df = inputfile_df.loc[inputfile_df['bikeid']==bikeid]
        bikepath_df.index=np.arange(len(bikepath_df))+1
        tbike_distance=0
        bike_history={}
        for row2 in range(len(bikepath_df)):
            lat1=bikepath_df.loc[row2+1,'start station latitude']
            lon1=bikepath_df.loc[row2+1,'start station longitude'] 
            lat2=bikepath_df.loc[row2+1,'end station latitude']
            lon2=bikepath_df.loc[row2+1,'end station longitude']
            tbike_distance+=distance(lat1,lon1,lat2,lon2)
        bike_history['month'] = month
        bike_history['bikeid'] = bikeid
        bike_history['mileage'] = round(tbike_distance,2)
        bike_history['biketrips'] = len(bikepath_df)
        bike_history_df=bike_history_df.append(bike_history,ignore_index=True)
    bike_history_df = bike_history_df.merge(datafile_df, on='bikeid')
    bike_history_df = bike_history_df.sort_values(by=['mileage'],ascending=False)
    pd.set_option("display.max_row", None, "display.max_columns", None)
    bike_history_df.bikeid=bike_history_df.bikeid.astype('int64')
    bike_history_df.biketrips=bike_history_df.biketrips.astype('int64')
    bike_history_df.set_index('bikeid', inplace=True)
    return bike_history_df

In [4]:
def top25mileage(bike_history_df, inputfile_df, month):
    top_mileage_df=pd.DataFrame()
    for row in range(25):
        bikeid = bike_history_df.iloc[row].name
        bikepath_df = inputfile_df.loc[inputfile_df['bikeid']==bikeid]
        bikepath_df.index=np.arange(len(bikepath_df))+1
        bikepath_df['month_bikeid'] = str(bikeid) + "-" + month
        top_mileage_df = top_mileage_df.append(bikepath_df, ignore_index=False)
    pd.set_option("display.max_row", None, "display.max_columns", None)
    top_mileage_df["bike_route_order"]=top_mileage_df.index
    top_mileage_df.head(25)
    return top_mileage_df

In [5]:
def getmonth(i):
    switcher={
        1:'January',
        2:'February',
        3:'March',
        4:'April',
        5:'May',
        6:'June',
        7:'July',
        8:'August',
        9:'September',
        10:'October',
        11:'November',
        12:'December'
    }
    return switcher.get(i,"Invalid Month")

In [6]:
configfile="Data/citibike_datafile_names.csv"
datafiles_df=pd.read_csv(configfile)
for file in range(len(datafiles_df)):
    datafile=datafiles_df['filename'][file]
    month=getmonth(int(datafile[4:6]))
    output_mileage_name="Data/" + datafile[0:6] + "_top_mileage.csv"
    output_history_name="Data/" + datafile[0:6] + "_bike_history.csv"
    inputfile_df=pd.read_csv("Data/" + datafile)
    print (output_mileage_name)
    print (output_history_name)
    bike_history_df=bikehistory(inputfile_df,month)
    top_mileage_df=top25mileage(bike_history_df, inputfile_df, month)
    bike_history_df.to_csv(output_history_name,index=True)
    top_mileage_df.to_csv(output_mileage_name, index=False)
print('Processing completed...')

Data/202001_top_mileage.csv
Data/202001_bike_history.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


Data/202002_top_mileage.csv
Data/202002_bike_history.csv
Data/202003_top_mileage.csv
Data/202003_bike_history.csv
Data/202004_top_mileage.csv
Data/202004_bike_history.csv
Data/202005_top_mileage.csv
Data/202005_bike_history.csv
Data/202006_top_mileage.csv
Data/202006_bike_history.csv
Data/202007_top_mileage.csv
Data/202007_bike_history.csv
Data/202008_top_mileage.csv
Data/202008_bike_history.csv
Data/202009_top_mileage.csv
Data/202009_bike_history.csv
Data/202010_top_mileage.csv
Data/202010_bike_history.csv
Data/202011_top_mileage.csv
Data/202011_bike_history.csv
Data/202012_top_mileage.csv
Data/202012_bike_history.csv
Processing completed...
