In [6]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score


# Load the dataset
try:
  data = pd.read_csv('/content/drive/MyDrive/machine learning/week 3/Infrared.csv')
except FileNotFoundError:
  print("Error: '/content/drive/MyDrive/machine learning/week 3/Infrared.csv' not found. Please upload the file or provide the correct path.")
  exit()

for col in data.columns:
  if data[col].isnull().any():
    if pd.api.types.is_numeric_dtype(data[col]):
      data[col].fillna(data[col].mean(), inplace=True)
    else:
        data[col].fillna(data[col].mode()[0], inplace=True)


X = data.drop('aveOralM', axis=1)
y = data['aveOralM']

# Encode non-numeric features using one-hot encoding if needed
X = pd.get_dummies(X, columns=X.select_dtypes(include=['object']).columns)


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# K-Nearest Neighbors Regression
knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(X_train, y_train)
knn_pred = knn.predict(X_test)
print("K-NN R-squared:", r2_score(y_test, knn_pred))
print("K-NN MSE:", mean_squared_error(y_test, knn_pred))

# Decision Tree Regression
dt = DecisionTreeRegressor()
dt.fit(X_train, y_train)
dt_pred = dt.predict(X_test)
print("\nDecision Tree R-squared:", r2_score(y_test, dt_pred))
print("Decision Tree MSE:", mean_squared_error(y_test, dt_pred))

K-NN R-squared: 0.6479456178529002
K-NN MSE: 0.0741328431372549

Decision Tree R-squared: 0.4461285128555075
Decision Tree MSE: 0.11662990196078434


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[col].fillna(data[col].mean(), inplace=True)


In [5]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np

# Load the dataset
try:
  data = pd.read_csv('/content/drive/MyDrive/machine learning/week 3/Infrared.csv')
except FileNotFoundError:
  print("Error: '/content/drive/MyDrive/machine learning/week 3/Infrared.csv' not found. Please upload the file or provide the correct path.")
  exit()

# Assuming 'aveOralM' is the target variable
# Handle missing values (if any)
for col in data.columns:
  if data[col].isnull().any():
    if pd.api.types.is_numeric_dtype(data[col]):
      data[col].fillna(data[col].mean(), inplace=True)
    else:
        data[col].fillna(data[col].mode()[0], inplace=True)

X = data.drop('aveOralM', axis=1)
y = data['aveOralM']

# Encode non-numeric features
X = pd.get_dummies(X, columns=X.select_dtypes(include=['object']).columns)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

def evaluate_model(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    return mse, rmse, r2, mae

# K-Nearest Neighbors Regression
knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(X_train, y_train)
knn_pred = knn.predict(X_test)
mse_knn, rmse_knn, r2_knn, mae_knn = evaluate_model(y_test, knn_pred)

print("K-NN Regression:")
print(f"  MSE: {mse_knn:.4f}")
print(f"  RMSE: {rmse_knn:.4f}")
print(f"  R-squared: {r2_knn:.4f}")
print(f"  MAE: {mae_knn:.4f}")

# Decision Tree Regression
dt = DecisionTreeRegressor()
dt.fit(X_train, y_train)
dt_pred = dt.predict(X_test)
mse_dt, rmse_dt, r2_dt, mae_dt = evaluate_model(y_test, dt_pred)


print("\nDecision Tree Regression:")
print(f"  MSE: {mse_dt:.4f}")
print(f"  RMSE: {rmse_dt:.4f}")
print(f"  R-squared: {r2_dt:.4f}")
print(f"  MAE: {mae_dt:.4f}")


K-NN Regression:
  MSE: 0.0741
  RMSE: 0.2723
  R-squared: 0.6479
  MAE: 0.2156

Decision Tree Regression:
  MSE: 0.1166
  RMSE: 0.3415
  R-squared: 0.4462
  MAE: 0.2502


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[col].fillna(data[col].mean(), inplace=True)
