### SPLITING DATA FOR TRAINING AND TESTING

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the preprocessed dataset
df = pd.read_csv('cleaned_car_data_experimental.csv')

# Split the dataset into training and testing sets
df_train = pd.DataFrame()
df_test = pd.DataFrame()

# Get unique models and manufacturers
models = df['Model'].unique()
manufacturers = df['Manufacturer'].unique()

for model in models:
    for manufacturer in manufacturers:
        model_manufacturer_pairs = df[(df['Manufacturer'] == manufacturer) & (df['Model'] == model)]
        if(len(model_manufacturer_pairs) != 0):
            train_pairs, test_pairs = train_test_split(model_manufacturer_pairs, test_size=0.2, random_state=12)

            df_train = pd.concat([df_train, train_pairs])
            df_test = pd.concat([df_test, test_pairs])

print(df_train.shape, df_test.shape)

In [None]:
df_train = df_train.drop(columns=["Manufacturer", "Model", "Fuel Type"])
df_test = df_test.drop(columns=["Manufacturer", "Model", "Fuel Type"])

df_train.shape
df_test.shape

In [None]:
df_train = df_train.dropna()
df_test = df_test.dropna()

### DATA SCALING

In [None]:
from sklearn.preprocessing import MinMaxScaler

non_bool_columns = df_train.select_dtypes(include=['int64', 'float64']).columns

scaler = MinMaxScaler()

scaler.fit(df_train[non_bool_columns])

df_train[non_bool_columns] = scaler.transform(df_train[non_bool_columns])
df_test[non_bool_columns] = scaler.transform(df_test[non_bool_columns])

print("Transformed df_train:")
print(df_train.head())

print("\nTransformed df_test:")
print(df_test.head())

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam


X_train = df_train.drop(columns=['Price'])  
y_train = df_train['Price']
X_test = df_test.drop(columns=['Price'])
y_test = df_test['Price']

print(X_train.shape, X_test.shape)


model = Sequential([
    Dense(64, input_shape=(X_train.shape[1],), activation='relu'),
    Dense(32, activation='relu'),
    Dense(1)  # Output layer for regression
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='mae')

In [None]:
history = model.fit(X_train, y_train, validation_split=0.2, epochs=10, batch_size=32, verbose=1)
y_pred = model.predict(X_test)

In [None]:
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
test_loss = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Loss: {test_loss:.4f}")

In [None]:
print(df_test["Price"])

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

# Create linear regression object
model = LinearRegression()

# Train the model using the training sets
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: {mae}")