In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import datetime as dt
import warnings
import seaborn as sns
from sklearn import linear_model
warnings.filterwarnings("ignore")

In [None]:
uber_data= pd.read_csv ("cab_rides.csv")
uber_data.head(10)

In [None]:
uber_data.columns

In [None]:
uber_data= uber_data[['id','product_id', 'name', 'source', 'time_stamp', 'cab_type', 'distance','destination', 'price', 'surge_multiplier', ]]

In [None]:
df= uber_data
df.head()

In [None]:
df.drop("id", axis=1, inplace=True)
df.describe()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.dropna().describe()

In [None]:
df['name'].value_counts()

In [None]:
df['source'].value_counts()


In [None]:
df['cab_type'].value_counts()


In [None]:
df['time_stamp'] = pd.to_datetime(df['time_stamp'], unit='ms')
df['date']= df['time_stamp'].dt.date
df['hour']= df['time_stamp'].dt.hour
df.drop("time_stamp", axis=1, inplace=True)
df['weekday'] = df['date'].apply(lambda x: dt.datetime.strftime(x, '%A'))
df.head()
df= df[['name', 'source', 'date','weekday','hour', 'cab_type', 'distance','destination', 'price', 'surge_multiplier','product_id']]
df.head(5)

In [None]:
df.price.mean()

In [None]:
group = df.groupby([df["cab_type"],df["name"]])
group.mean()

In [None]:
df[df.cab_type=="Uber"].price.mean()

In [None]:
df[df.cab_type=="Lyft"].price.mean()

In [None]:
group[["price"]].mean().plot(kind="bar")

In [None]:
group1= group = df.groupby([df["cab_type"],df["destination"]])

In [None]:
group1.mean()

In [None]:
group1["distance"].mean().plot(kind='bar')

In [None]:
plt.figure(figsize=(14,4))
for i,cab_type in enumerate(df['cab_type'].unique()):
    plt.subplot(1,2,i+1)
    sns.histplot(data=df,x=df[df['cab_type'] == cab_type]['hour'],bins=24)
    plt.xlabel('Hours in month {}'.format(cab_type))
    plt.ylabel('Total Rides')

In [None]:
plt.figure(figsize=(25,60))
for i,name in enumerate(df['name'].unique()):
    plt.subplot(13,3,i+1)
    sns.histplot(data=df,x=df[df['name'] == name]['weekday'],bins=24)
    plt.xlabel('Hours in Type {}'.format(name))
    plt.ylabel('Total Rides')

In [None]:
deepCopy = df.copy()
shallowCopy = df.copy(deep=False)
df1 = deepCopy.loc[deepCopy["cab_type"] == "Uber"]
df2 = shallowCopy.loc[shallowCopy["cab_type"] == "Lyft"]

In [None]:
fig = px.density_heatmap(df1, x="weekday", y="price").update_layout(yaxis_range=[0,40],
                                                                    title="Uber Price range on weekdays",
                                                                    xaxis_title="WEEKDAYS",
                                                                    yaxis_title="Price Range",
                                                                    font=dict( family="Open Sans, monospace",
                                                                              size=16, color="#135e96"))
fig.show()

In [None]:
fig1= px.density_heatmap(df2, x="weekday", y="price").update_layout(yaxis_range=[0,40],
                                                                    title="Lyft Price range on weekdays",
                                                                    xaxis_title="WEEKDAYS",
                                                                    yaxis_title="Price Range",
                                                                    font=dict( family="Open Sans, monospace",
                                                                              size=16, color="#135e96"))
fig1.show()

In [None]:
fig2= px.scatter(df1, x="distance", y="price", color="name").update_layout(yaxis_range=[0,100],
                                                                           title="Uber Price range Based on the Distance",
                                                                    xaxis_title="Distance to the Destination",
                                                                    yaxis_title="Price Range",
                                                                    font=dict( family="Open Sans, monospace",
                                                                              size=16, color="#135e96"))
fig2.show()

In [None]:
fig3= px.scatter(df2, x="distance", y="price", color="name").update_layout(yaxis_range=[0,100],
                                                                           title="Lyft Price range Based on the Distance",
                                                                    xaxis_title="Distance to the Destination",
                                                                    yaxis_title="Price Range",
                                                                    font=dict( family="Open Sans, monospace",
                                                                              size=16, color="#135e96"))
fig3.show()

In [None]:
fig4= px.scatter(df2, x="price", y="destination", color="surge_multiplier",
                 color_continuous_scale=px.colors.sequential.Cividis_r).update_layout(xaxis_range=[0,100],
                                                                           title="When Surge Pricing Applied?",
                                                                    xaxis_title="Price",
                                                                    yaxis_title="Destination",
                                                                    font=dict( family="Open Sans, monospace",
                                                                              size=14, color="#135e96"))
fig4.show()

In [None]:
fig5 = px.density_heatmap(df1, x="destination", y="price",
                            nbinsx=20, nbinsy=20, color_continuous_scale="Viridis").update_layout(yaxis_range=[0,50],
                                                                           title="Heatmap Density of Uber Price Range by Destination",
                                                                    xaxis_title="Destination Type",
                                                                    yaxis_title="Price Range",
                                                                    font=dict( family="Open Sans, monospace",
                                                                              size=16, color="#135e96"))
fig5.show()

In [None]:
fig6 = px.density_heatmap(df2, x="destination", y="price",
                            nbinsx=20, nbinsy=20, color_continuous_scale="Viridis").update_layout(yaxis_range=[0,50],
                                                                           title="Heatmap Density of Lyft Price Range by Destination",
                                                                    xaxis_title="Destination Type",
                                                                    yaxis_title="Price Range",
                                                                    font=dict( family="Open Sans, monospace",
                                                                              size=16, color="#135e96"))
fig6.show()

In [None]:
corrmatrix, ax =plt.subplots(1,2,sharex=True, figsize=(18,5))
sns.heatmap(df1.corr(),annot=True, ax=ax[0]).set(title='Uber Rides Correlation Matrix')
sns.heatmap(df2.corr(),annot=True, ax=ax[1]).set_title("Lyft Rides Correlation Matrix")
corrmatrix.show()