In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader,TensorDataset
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [3]:
!curl -L -o taxi-price-prediction.zip https://www.kaggle.com/api/v1/datasets/download/denkuznetz/taxi-price-prediction

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 20027  100 20027    0     0  69809      0 --:--:-- --:--:-- --:--:-- 69809


In [4]:
from zipfile import ZipFile

with ZipFile("/content/taxi-price-prediction.zip","r") as f:
  f.extractall()

In [5]:
df = pd.read_csv("/content/taxi_trip_pricing.csv")

In [6]:
df = df.dropna(subset=['Trip_Price'])

In [7]:
df.head()

Unnamed: 0,Trip_Distance_km,Time_of_Day,Day_of_Week,Passenger_Count,Traffic_Conditions,Weather,Base_Fare,Per_Km_Rate,Per_Minute_Rate,Trip_Duration_Minutes,Trip_Price
0,19.35,Morning,Weekday,3.0,Low,Clear,3.56,0.8,0.32,53.82,36.2624
2,36.87,Evening,Weekend,1.0,High,Clear,2.7,1.21,0.15,37.27,52.9032
3,30.33,Evening,Weekday,4.0,Low,,3.48,0.51,0.15,116.81,36.4698
4,,Evening,Weekday,3.0,High,Clear,2.93,0.63,0.32,22.64,15.618
5,8.64,Afternoon,Weekend,2.0,Medium,Clear,2.55,1.71,0.48,89.33,60.2028


In [8]:
df.isna().sum()

Unnamed: 0,0
Trip_Distance_km,50
Time_of_Day,49
Day_of_Week,46
Passenger_Count,48
Traffic_Conditions,50
Weather,46
Base_Fare,44
Per_Km_Rate,44
Per_Minute_Rate,49
Trip_Duration_Minutes,46


In [9]:
df['Trip_Price'].sum()

np.float64(54087.909432750595)

In [10]:
X = df.drop('Trip_Price',axis=1)
y = np.log1p(df['Trip_Price'].values).reshape(-1,1)

In [11]:
num_features = X.select_dtypes(include=[np.number]).columns
cat_features = X.select_dtypes(exclude=[np.number]).columns

In [12]:
num_features

Index(['Trip_Distance_km', 'Passenger_Count', 'Base_Fare', 'Per_Km_Rate',
       'Per_Minute_Rate', 'Trip_Duration_Minutes'],
      dtype='object')

In [13]:
num_transformer = Pipeline ([
    ('imputer',SimpleImputer(strategy='mean')),
    ('scaler',StandardScaler())
])

cat_transformer = Pipeline ([
    ('imputer',SimpleImputer(strategy='most_frequent')),
    ('encoder',OneHotEncoder(sparse_output=False,handle_unknown='ignore'))
])

In [14]:
preprocessor = ColumnTransformer([
    ("num",num_transformer,num_features),
    ("cat",cat_transformer,cat_features)
])

In [15]:
X_processed = preprocessor.fit_transform(X)

In [16]:
X_train,X_test,y_train,y_test = train_test_split(X_processed,y,test_size=0.2)

# Torch Hissesi


In [17]:
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train)

X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.FloatTensor(y_test)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=32)

In [18]:
class MLP(nn.Module):
  def __init__(self,input_features):
    super().__init__()
    self.net = nn.Sequential(
        nn.Linear(input_features,64),
        nn.ReLU(),
        nn.Linear(64,32),
        nn.ReLU(),
        nn.Linear(32,1)
    )

  def forward(self,x):
    return self.net(x)

In [19]:
input_dim = X_train.shape[1]
model = MLP(input_dim)
print(model)

MLP(
  (net): Sequential(
    (0): Linear(in_features=18, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=1, bias=True)
  )
)


In [20]:
if torch.cuda.is_available():
  device = 'cuda'
if hasattr(torch.backends,'mps') and torch.backends.mps.is_available():
  device = 'mps'
else:
  device = 'cpu'

In [21]:
model.to(device)

MLP(
  (net): Sequential(
    (0): Linear(in_features=18, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=1, bias=True)
  )
)

In [28]:
criterion = nn.MSELoss()
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)
n_epochs = 100

model.train()

MLP(
  (net): Sequential(
    (0): Linear(in_features=18, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=1, bias=True)
  )
)

In [29]:
def train_bgd(model,optimizer,criterion,X_train,y_train,n_epochs):
  for epoch in range(n_epochs):
    y_pred = model(X_train)
    loss = criterion(y_pred,y_train)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    print(f"Epoch {epoch+1}/{n_epochs}, Loss: {loss.item()}")

In [30]:
train_bgd(model,optimizer,criterion,X_train_tensor,y_train_tensor,n_epochs)

Epoch 1/100, Loss: 4.0492048263549805
Epoch 2/100, Loss: 3.803466558456421
Epoch 3/100, Loss: 3.5615475177764893
Epoch 4/100, Loss: 3.323820114135742
Epoch 5/100, Loss: 3.0903513431549072
Epoch 6/100, Loss: 2.861604690551758
Epoch 7/100, Loss: 2.6379146575927734
Epoch 8/100, Loss: 2.4197161197662354
Epoch 9/100, Loss: 2.2076120376586914
Epoch 10/100, Loss: 2.0021204948425293
Epoch 11/100, Loss: 1.803917646408081
Epoch 12/100, Loss: 1.6138098239898682
Epoch 13/100, Loss: 1.4326399564743042
Epoch 14/100, Loss: 1.261170506477356
Epoch 15/100, Loss: 1.1003081798553467
Epoch 16/100, Loss: 0.9510215520858765
Epoch 17/100, Loss: 0.8142329454421997
Epoch 18/100, Loss: 0.6908192038536072
Epoch 19/100, Loss: 0.5815852284431458
Epoch 20/100, Loss: 0.4871482849121094
Epoch 21/100, Loss: 0.4077796936035156
Epoch 22/100, Loss: 0.34353604912757874
Epoch 23/100, Loss: 0.294158011674881
Epoch 24/100, Loss: 0.25904104113578796
Epoch 25/100, Loss: 0.23704569041728973
Epoch 26/100, Loss: 0.226584538817405