In [10]:
import pandas as pd
import numpy as np
import geopy.distance

In [87]:
df = pd.read_csv("./ecobicidata/ecobici_mar_resume.csv")
name_of_files = ["jan","feb","mar","apr","may","jun","jul"]
fulldf = pd.concat([pd.read_csv(f"./ecobicidata/ecobici_{x}.csv") for i,x in enumerate(name_of_files)])

In [19]:
def estaciones_df():
    estaciones = pd.read_csv("./ecobicidata/estaciones-de-ecobici.csv")[["id","name","districtcode","districtname","location_lat","location_lon","stationtype","punto_geo"]]
    estaciones["Ciclo_Estacion_Retiro"] = estaciones["id"].astype("str").copy()
    estaciones["Ciclo_EstacionArribo"] = estaciones["id"].copy()
    estaciones_retiro = estaciones.iloc[:,[-2,1,2,3,4,5,6,7]].rename(columns={"name":"name_retiro","districtcode":"districtcode_retiro","districtname":"districtname_retiro","location_lat":"location_lat_retiro","location_lon":"location_lon_retiro","stationtype":"stationtype_retiro","punto_geo":"punto_geo_retiro"}).copy().iloc[:,[0,1,4,5,7]]
    estaciones_arribo = estaciones.iloc[:,[-1,1,2,3,4,5,6,7]].rename(columns={"name":"name_arribo","districtcode":"districtcode_arribo","districtname":"districtname_arribo","location_lat":"location_lat_arribo","location_lon":"location_lon_arribo","stationtype":"stationtype_arribo","punto_geo":"punto_geo_arribo"}).copy().iloc[:,[0,1,4,5,7]]
    return estaciones_retiro, estaciones_arribo

estaciones_retiro, estaciones_arribo = estaciones_df()

In [90]:
def mergingfiles(month, er, ea):
    first = month.merge(er, on="Ciclo_Estacion_Retiro", how="left").merge(ea, on="Ciclo_EstacionArribo", how="left")
    return first

exportfileI = mergingfiles(month=fulldf, er=estaciones_retiro, ea=estaciones_arribo)

In [21]:
def filetoexport(first):
    location_lat_retiro = first["location_lat_retiro"].fillna('19.412182').to_list()
    location_lon_retiro = first["location_lon_retiro"].fillna('19.412182').to_list()
    location_lat_arribo = first["location_lat_arribo"].fillna('19.412182').to_list()
    location_lon_arribo = first["location_lon_arribo"].fillna('19.412182').to_list()

    distances = pd.DataFrame({"location_dist":[geopy.distance.distance((location_lat_retiro[i],location_lon_retiro[i]), (location_lat_arribo[i],location_lon_arribo[i])).km for i in range(len(location_lon_arribo))]})
    
    l = pd.concat([first, distances], axis=1, join="inner").iloc[:,[0,1,2,3,4,5,6,7,8,9,10,11,12,13,15,16,17,19]]
    l["Genero_Usuario"] = l["Genero_Usuario"].fillna("X")
    return l

lI = filetoexport(first=exportfileI)

In [190]:
efile = exportfileI.replace("",np.nan).dropna(axis=0)

# Top Bike
def top_bike(efile=efile):
    top_bike = efile[["Bici"]].groupby("Bici").size().reset_index().sort_values(by=0, ascending=False)["Bici"].to_list()[0]
    bike = efile.loc[ efile["Bici"]==top_bike]
    trips_per_month = bike[["Mes"]].groupby("Mes").size().reset_index().set_index("Mes").rename(columns={0:"Trips"}).transpose().to_dict()
    
    location_lat_retiro = bike["location_lat_retiro"].fillna('19.412182').to_list()
    location_lon_retiro = bike["location_lon_retiro"].fillna('19.412182').to_list()
    location_lat_arribo = bike["location_lat_arribo"].fillna('19.412182').to_list()
    location_lon_arribo = bike["location_lon_arribo"].fillna('19.412182').to_list()

    distances = pd.DataFrame({"location_dist":[geopy.distance.distance((location_lat_retiro[i],location_lon_retiro[i]), (location_lat_arribo[i],location_lon_arribo[i])).km for i in range(len(location_lon_arribo))]})
    
    last_trips = bike.sort_values(by="Fecha_Retiro").tail(100)[["Fecha_Retiro","viaje","location_lat_retiro", "location_lon_retiro","location_lat_arribo","location_lon_arribo"]].set_index("viaje").transpose().to_dict()
    
    topBike={
        "trips_per_moth":trips_per_month,
        "total_km":int(distances.sum()),
        "last_trips":last_trips
    }

    return topBike

topBike = top_bike()    

In [157]:
all_yearsample = pd.read_csv("./ecobicidata/allyear.csv", index_col=0)

In [192]:
full_year_dataI={
    "trips_per_month":efile[["Mes"]].groupby("Mes").size().reset_index().set_index("Mes").rename(columns={0:"Trips"}).transpose().to_dict(),
    "age_distribution":efile[["Edad_Usuario"]].groupby("Edad_Usuario").size().reset_index().set_index("Edad_Usuario").rename(columns={0:"Trips"}).transpose().to_dict(),
    "median_trip_time":efile["time_delta"].median(),
    "total_trips":len(efile),
    "median_trips_per_bike":efile[["Bici"]].groupby("Bici").size().reset_index()[0].median(),
    "top_100_trips":efile[["name_arribo","name_retiro","viaje"]].groupby(["name_arribo","name_retiro","viaje"]).size().reset_index().set_index("viaje").sort_values(by=0,ascending=False).head(100).transpose().to_dict(),
    "avg_km_per_trip":round(all_yearsample["location_dist"].median(),2),
    "avg_time_per_trip_per_year":all_yearsample[["Mes","time_delta"]].groupby("Mes").median().reset_index().set_index("Mes").transpose().to_dict(),
    "avg_time_delta":all_yearsample[["Mes","time_delta"]].groupby("Mes").median().reset_index().set_index("Mes")["time_delta"].median(),
    "topBike":topBike
}

In [194]:
import json
with open("./app/sample.json", "w") as outfile:
    json.dump(full_year_dataI, outfile)