In [2]:
import numpy as np
import pandas as pd

**Creating a Sample Dataset**

In [3]:
# Generating random data for our features
np.random.seed(42)

# Features: distance (km), order size (number of items), delivery time (minutes)
distance = np.random.uniform(1, 10, 100)  # Distance between 1 and 10 km
order_size = np.random.randint(1, 10, 100)  # Order size between 1 and 10 items
delivery_time = np.random.uniform(10, 60, 100)  # Delivery time between 10 and 60 minutes

# Weights for the relationship between features and cost (just for simulation)
cost_per_km = 2  # cost per km
cost_per_item = 1.5  # cost per item
cost_per_minute = 0.3  # cost per minute

# Simulated delivery cost (y) formula: 
# y = cost_per_km * distance + cost_per_item * order_size + cost_per_minute * delivery_time + noise
noise = np.random.normal(0, 1, 100)  # Adding some noise to make it more realistic
delivery_cost = cost_per_km * distance + cost_per_item * order_size + cost_per_minute * delivery_time + noise

# Creating the DataFrame
data = pd.DataFrame({
    'distance': distance,
    'order_size': order_size,
    'delivery_time': delivery_time,
    'delivery_cost': delivery_cost
})

print(data.head())  

   distance  order_size  delivery_time  delivery_cost
0  4.370861           7      36.482529      30.872741
1  9.556429           1      22.092615      25.627926
2  7.587945           4      14.655138      25.100501
3  6.387926           4      54.860788      36.323040
4  2.404168           5      55.020903      28.878886


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Splitting the dataset into training and testing sets
X = data[['distance', 'order_size', 'delivery_time']]  # Independent variables
y = data['delivery_cost']  # Dependent variable (target)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Building the model
model = LinearRegression()

# Fitting the model to the training data
model.fit(X_train, y_train)

# Making predictions on the test data
y_pred = model.predict(X_test)

# Evaluating the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

# Output the coefficients of the model
print(f"Intercept: {model.intercept_}")
print(f"Coefficients: {model.coef_}")

Mean Squared Error: 1.1287957877295272
R-squared: 0.977958053035444
Intercept: -0.06374004883715045
Coefficients: [2.0304861  1.50747205 0.29552585]
