In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler

In [None]:
data = pd.read_csv('uber.csv')
data.drop(columns=['Unnamed: 0', 'key', 'pickup_datetime'], inplace=True)
data.dropna(inplace=True)
data.head()

In [None]:
columns = ['fare_amount', 'pickup_longitude', 'pickup_latitude', 'passenger_count']

plt.figure(figsize=(12, 6))
for i, col in enumerate(columns, 1):
    plt.subplot(1, 4, i)
    sns.boxplot(data[col])
    plt.title(f'Boxplot for {col}')

plt.tight_layout()
plt.show()

df = data.copy()

for col in columns:
    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    df = df[(df[col] >= lower_bound) & (df[col] <= upper_bound)]

print(f"\nOriginal Data Shape: {data.shape}")
print(f"Data Shape after Removing Outliers: {df.shape}")

In [None]:
correlation_matrix = data.corr()
print(correlation_matrix['fare_amount'].sort_values(ascending=False))

In [None]:
X = data.drop('fare_amount', axis=1)
y = data['fare_amount']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
linear_reg = LinearRegression()
linear_reg.fit(X_train, y_train)
y_pred_linear = linear_reg.predict(X_test)

ridge_reg = Ridge(alpha=1.0)
ridge_reg.fit(X_train, y_train)
y_pred_ridge = ridge_reg.predict(X_test)

lasso_reg = Lasso(alpha=1.0)
lasso_reg.fit(X_train, y_train)
y_pred_lasso = lasso_reg.predict(X_test)

In [None]:
def evaluate_model(y_true, y_pred):
    r2 = r2_score(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    return r2, rmse, mae, mse

r2_linear, rmse_linear, mae_linear, mse_linear = evaluate_model(y_test, y_pred_linear)
r2_ridge, rmse_ridge, mae_ridge, mse_ridge = evaluate_model(y_test, y_pred_ridge)
r2_lasso, rmse_lasso, mae_lasso, mse_lasso = evaluate_model(y_test, y_pred_lasso)

print("Linear Regression: R2 = {:.2f}, RMSE = {:.2f}, MAE = {:.2f}, MSE = {:.2f}".format(r2_linear, rmse_linear, mae_linear, mse_linear))
print("Ridge Regression: R2 = {:.2f}, RMSE = {:.2f}, MAE = {:.2f}, MSE = {:.2f}".format(r2_ridge, rmse_ridge, mae_ridge, mse_ridge))
print("Lasso Regression: R2 = {:.2f}, RMSE = {:.2f}, MAE = {:.2f}, MSE = {:.2f}".format(r2_lasso, rmse_lasso, mae_lasso, mse_lasso))