**Importing Libraries**

In [None]:
#importing useful libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

**Solution 1:**

In [None]:
#load dataset(given in '.csv' form)
df = pd.read_csv('/content/automobile.csv')

#repalcing the '?' value with 'NAN' value
df.replace('?', np.nan, inplace=True)



**Solution 2:**

In [None]:
#checking the missing values datatypes
columns_missing_value = df.columns[df.isnull().any()]
missing_columns_datatypes = df[columns_missing_value].dtypes
print("Data types of columns with missing values:")
print(missing_columns_datatypes)


Data types of columns with missing values:
normalized-losses    object
num-of-doors         object
bore                 object
stroke               object
horsepower           object
peak-rpm             object
price                object
dtype: object


Not need any data type conversions as all the column with missing values are of same datatpye.

In [None]:
# Handling the missing values
numeric_columns = df.select_dtypes(include=np.number).columns
for x in numeric_columns:
    medians = df[x].median()
    df[x].fillna(medians, inplace=True)

category_columns = df.select_dtypes(exclude=np.number).columns
for y in category_columns:
    modes = df[y].mode().values[0]
    df[y].fillna(modes, inplace=True)


In [None]:
# Verify if all missing values have been filled
missed_values = df.isnull().sum().sum()
print("Number of missing values after handling: ", missed_values)

Number of missing values after handling:  0


After resolving the missing values, saving the dataset as new **"csv"** file.

In [None]:
# Save the cleaned dataset and creating new csv file
df.to_csv('new_automobile.csv', index=False)

**Solution 3:**

In [None]:
#loading new dataset file for predictions
new_data = pd.read_csv('/content/new_automobile.csv')

# Calculate the correlation matrix
correlation_matrix = new_data.corr()

  correlation_matrix = new_data.corr()


In [None]:
# Find the features with strong correlation to 'Price'
correlation_feature_cf = correlation_matrix['price'].sort_values(ascending=False)

# Print the correlated features
print("Correlated features to 'Price':")
print(correlation_feature_cf)

Correlated features to 'Price':
price                1.000000
engine-size          0.861769
curb-weight          0.820807
horsepower           0.759731
width                0.729009
length               0.682528
wheel-base           0.582915
bore                 0.525878
normalized-losses    0.317097
height               0.133761
stroke               0.081537
compression-ratio    0.070789
symboling           -0.082396
peak-rpm            -0.103493
Unnamed: 0          -0.117108
city-mpg            -0.667215
highway-mpg         -0.690173
Name: price, dtype: float64


**Solution 4:**

Use engine-size as independent variable

In [None]:
# Select the independent variable and the target variable
independent_variable = 'engine-size'
target_variable = 'price'

In [None]:
# Split the data into training and testing sets
A_train, A_test, B_train, B_test = train_test_split(new_data[independent_variable], new_data[target_variable], test_size=0.3, random_state=40)

# Reshape the data to match the expected input shape of the model
A_train = A_train.values.reshape(-1, 1)
A_test = A_test.values.reshape(-1, 1)

In [None]:
# Initialize and train the linear regression model
model_1 = LinearRegression()
model_1.fit(A_train, B_train)

# Make predictions on the testing set
y_pred = model_1.predict(A_test)

In [None]:
# Evaluate the model's performance
mse_1 = mean_squared_error(B_test, y_pred)
rmse_1 = mean_squared_error(B_test, y_pred, squared=False)
r2_1 = r2_score(B_test, y_pred)

Results of solution 4 after bulding model.

In [None]:
# Print the evaluation metrics
print("Mean Squared Error (MSE):", mse_1)
print("Root Mean Squared Error (RMSE):", rmse_1)
print("R-squared (R2):", r2_1)

Mean Squared Error (MSE): 9683687.279391937
Root Mean Squared Error (RMSE): 3111.862349043083
R-squared (R2): 0.615672224689068


**Solution 5:**

Use horsepower as independent variable

In [None]:
# Select the independent variable and the target variable
independent_variable = 'horsepower'
target_variable = 'price'

In [None]:
# Split the data into training and testing sets
A1_train, A1_test, B1_train, B1_test = train_test_split(new_data[independent_variable], new_data[target_variable], test_size=0.3, random_state=40)

# Reshape the data to match the expected input shape of the model
A1_train = A1_train.values.reshape(-1, 1)
A1_test = A1_test.values.reshape(-1, 1)

# Initialize and train the linear regression model
model_2 = LinearRegression()
model_2.fit(A1_train, B1_train)

# Make predictions on the testing set
y_pred = model_2.predict(A1_test)

# Evaluate the model's performance
mse = mean_squared_error(B1_test, y_pred)
rmse = mean_squared_error(B1_test, y_pred, squared=False)
r2 = r2_score(B1_test, y_pred)


Results of solution 5

In [None]:
# Print the evaluation metrics
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared (R2):", r2)

Mean Squared Error (MSE): 29897790.25119074
Root Mean Squared Error (RMSE): 5467.887183473224
R-squared (R2): -0.18658842261524633
