In [1]:
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
#from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
nltk.download('vader_lexicon')
sid = SentimentIntensityAnalyzer()
current_year = datetime.now().year

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/TimmyRen/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
df = pd.read_csv('final_cleaned.csv', index_col = False)
car_data_encoded = pd.get_dummies(df, columns=['Model Full Name', 'Drivetrain', 'Vehicle Class'], drop_first=True)
selected_topics = ['Topic_0', 'Topic_2', 'Topic_3', 'Topic_4', 'Topic_5', 'Topic_7', 'Topic_8', 'Topic_9']

for topic in selected_topics:
    car_data_encoded[f'{topic}_Weighted'] = car_data_encoded[topic] * car_data_encoded['Sentiment Score']

weighted_topic_features = [f'{topic}_Weighted' for topic in selected_topics]

In [3]:
results_list = []

# Functional

In [4]:
functional_features = [
    'Horsepower (hp)', 'Curb Weight (lbs)', 'Combined MPG', 'Fuel Capacity (gallons)', 'Age'
] + [col for col in car_data_encoded.columns if col.startswith('Drivetrain_')]

vehicle_class_features = [col for col in car_data_encoded.columns if col.startswith('Vehicle Class_')]

functional_features += vehicle_class_features

features = functional_features

### Functional Unaggregated

In [5]:
#unaggregated

y_price = car_data_encoded['Average KBB Fair Price ($)']
X = car_data_encoded[features]

X_train, X_test, y_train, y_test = train_test_split(X, y_price, test_size=0.2, random_state=15)


price_model = LinearRegression()
price_model.fit(X_train, y_train)
y_pred = price_model.predict(X_test)

r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))


price_coefficients = dict(zip(X.columns, price_model.coef_))

# Print R^2 and RMSE
print(f"R^2: {r2:.4f}")
print(f"\nRMSE: {rmse:.4f}")

# Print Model Coefficients
print(f"\nModel Coefficients:")
for feature, coef in price_coefficients.items():
    print(f"  {feature}: {coef:.4f}")

# Print Intercept
print(f"\nIntercept: {price_model.intercept_:.4f}")

division_title = "Functional Unaggregated"
model_type = "Linear Regression"

# Append the results to the list as a dictionary
results_list.append({
    "Division Title": division_title,
    "Model Type": model_type,
    "Best Alpha": None,
    "R^2": r2,
    "RMSE": rmse
})

R^2: 0.7800

RMSE: 4123.7972

Model Coefficients:
  Horsepower (hp): 47.0750
  Curb Weight (lbs): -2.1557
  Combined MPG: -7.0564
  Fuel Capacity (gallons): 185.2962
  Age: -1750.4193
  Drivetrain_4WD: 4185.8522
  Drivetrain_AWD: -854.1422
  Drivetrain_FWD: -2477.9903
  Drivetrain_RWD: 578.9442
  Vehicle Class_compact-suv: -935.3245
  Vehicle Class_electric-car: 3351.7157
  Vehicle Class_electric-suv: 4715.5914
  Vehicle Class_full-size: -1152.8683
  Vehicle Class_full-size-truck: -1012.8441
  Vehicle Class_hybrid-car: 559.6436
  Vehicle Class_hybrid-suv: -3184.6762
  Vehicle Class_luxury-hybrid-suv: 3100.9584
  Vehicle Class_mid-size: 1234.1979
  Vehicle Class_mid-size-suv: 967.6763
  Vehicle Class_mid-size-truck: 2313.9656
  Vehicle Class_minivan: 1583.3936

Intercept: 25117.5797


#### Functional Unaggregated Lasso

In [6]:
alpha_values = [0.1, 1.0, 10.0, 100, 200, 300, 500, 1000]
best_rmse = float('inf')  # Initialize with a very high RMSE
best_alpha = None
best_coefficients = None
best_intercept = None

# Loop through each alpha value
for alpha in alpha_values:
    # Initialize and train the Lasso model with the current alpha
    lasso_model = Lasso(alpha=alpha)
    lasso_model.fit(X_train, y_train)
    y_pred = lasso_model.predict(X_test)
    
    # Calculate evaluation metrics
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    
    # Print RMSE and R^2 for this alpha
    print(f"\nAlpha: {alpha}")
    print(f"  R^2: {r2:.4f}")
    print(f"  RMSE: {rmse:.4f}")
    
    # Check if this model has the best RMSE so far
    if rmse < best_rmse:
        best_rmse = rmse
        best_alpha = alpha
        best_r2 = r2
        best_coefficients = dict(zip(X.columns, lasso_model.coef_))
        best_intercept = lasso_model.intercept_

# Print the best alpha and its coefficients
print(f"\nBest Alpha based on RMSE: {best_alpha}")
print(f"Best R^2: {best_r2:.4f}")
print(f"Best RMSE: {best_rmse:.4f}")

# Print coefficients for the best alpha
print("\nModel Coefficients for the Best Alpha:")
for feature, coef in best_coefficients.items():
    print(f"  {feature}: {coef:.4f}")

# Print Intercept for the best alpha
print(f"\nIntercept: {best_intercept:.4f}")

division_title = "Functional Unaggregated"
model_type = "Lasso"

results_list.append({
    "Division Title": division_title,
    "Model Type": model_type,
    "Best Alpha": best_alpha,
    "R^2": best_r2,
    "RMSE": best_rmse
})



Alpha: 0.1
  R^2: 0.7800
  RMSE: 4123.7876

Alpha: 1.0
  R^2: 0.7800
  RMSE: 4123.8508

Alpha: 10.0
  R^2: 0.7789
  RMSE: 4133.9503

Alpha: 100
  R^2: 0.7631
  RMSE: 4279.5725

Alpha: 200
  R^2: 0.7520
  RMSE: 4378.6120

Alpha: 300
  R^2: 0.7434
  RMSE: 4454.1856

Alpha: 500
  R^2: 0.7307
  RMSE: 4562.9037

Alpha: 1000
  R^2: 0.7281
  RMSE: 4584.6565

Best Alpha based on RMSE: 0.1
Best R^2: 0.7800
Best RMSE: 4123.7876

Model Coefficients for the Best Alpha:
  Horsepower (hp): 47.0864
  Curb Weight (lbs): -2.1578
  Combined MPG: -6.7547
  Fuel Capacity (gallons): 185.1604
  Age: -1750.1532
  Drivetrain_4WD: 4188.3909
  Drivetrain_AWD: -850.3292
  Drivetrain_FWD: -2474.4769
  Drivetrain_RWD: 581.1720
  Vehicle Class_compact-suv: -946.0962
  Vehicle Class_electric-car: 3316.0092
  Vehicle Class_electric-suv: 4678.1378
  Vehicle Class_full-size: -1160.8719
  Vehicle Class_full-size-truck: -1013.9349
  Vehicle Class_hybrid-car: 540.3572
  Vehicle Class_hybrid-suv: -3189.2031
  Vehicle Clas

#### Functional Unaggregated Ridge

In [7]:
best_rmse = float('inf')  # Initialize with a very high RMSE
best_alpha = None
best_coefficients = None
best_intercept = None

# Loop through each alpha value
for alpha in alpha_values:
    # Initialize and train the Lasso model with the current alpha
    Ridge_model = Ridge(alpha=alpha)
    Ridge_model.fit(X_train, y_train)
    y_pred = Ridge_model.predict(X_test)
    
    # Calculate evaluation metrics
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    
    # Print RMSE and R^2 for this alpha
    print(f"\nAlpha: {alpha}")
    print(f"  R^2: {r2:.4f}")
    print(f"  RMSE: {rmse:.4f}")
    
    # Check if this model has the best RMSE so far
    if rmse < best_rmse:
        best_rmse = rmse
        best_alpha = alpha
        best_r2 = r2
        best_coefficients = dict(zip(X.columns, lasso_model.coef_))
        best_intercept = lasso_model.intercept_

# Print the best alpha and its coefficients
print(f"\nBest Alpha based on RMSE: {best_alpha}")
print(f"Best R^2: {best_r2:.4f}")
print(f"Best RMSE: {best_rmse:.4f}")

# Print coefficients for the best alpha
print("\nModel Coefficients for the Best Alpha:")
for feature, coef in best_coefficients.items():
    print(f"  {feature}: {coef:.4f}")

# Print Intercept for the best alpha
print(f"\nIntercept: {best_intercept:.4f}")

division_title = "Functional Unaggregated"
model_type = "Ridge"

results_list.append({
    "Division Title": division_title,
    "Model Type": model_type,
    "Best Alpha": best_alpha,
    "R^2": best_r2,
    "RMSE": best_rmse
})


Alpha: 0.1
  R^2: 0.7800
  RMSE: 4123.7975

Alpha: 1.0
  R^2: 0.7800
  RMSE: 4123.8085

Alpha: 10.0
  R^2: 0.7799
  RMSE: 4124.4610

Alpha: 100
  R^2: 0.7786
  RMSE: 4137.2946

Alpha: 200
  R^2: 0.7770
  RMSE: 4152.0822

Alpha: 300
  R^2: 0.7755
  RMSE: 4166.3254

Alpha: 500
  R^2: 0.7726
  RMSE: 4192.3920

Alpha: 1000
  R^2: 0.7669
  RMSE: 4244.7912

Best Alpha based on RMSE: 0.1
Best R^2: 0.7800
Best RMSE: 4123.7975

Model Coefficients for the Best Alpha:
  Horsepower (hp): 54.9404
  Curb Weight (lbs): -0.8939
  Combined MPG: 16.6959
  Fuel Capacity (gallons): 50.7800
  Age: -1623.9290
  Drivetrain_4WD: 0.0000
  Drivetrain_AWD: -0.0000
  Drivetrain_FWD: -0.0000
  Drivetrain_RWD: 0.0000
  Vehicle Class_compact-suv: -0.0000
  Vehicle Class_electric-car: 0.0000
  Vehicle Class_electric-suv: 0.0000
  Vehicle Class_full-size: -0.0000
  Vehicle Class_full-size-truck: -0.0000
  Vehicle Class_hybrid-car: -0.0000
  Vehicle Class_hybrid-suv: -0.0000
  Vehicle Class_luxury-hybrid-suv: 0.0000
 

### Functional Aggregated

In [8]:
#aggregated
aggregated_data = df.groupby(['Model Full Name', 'Year']).agg({
    'Horsepower (hp)': 'mean',
    'Curb Weight (lbs)': 'mean',
    'Combined MPG': 'mean',
    'Fuel Capacity (gallons)': 'mean',
    'Age': 'mean',
    'Average KBB Fair Price ($)': 'mean',
    'Vehicle Class': 'first',
    'Car Brand': 'first',
    'Car Model': 'first',
    'Drivetrain': 'first'
}).reset_index()

car_data_encoded_agg = pd.get_dummies(aggregated_data, columns=['Car Brand', 'Car Model', 'Drivetrain', 'Vehicle Class'], drop_first=True)

y_price = car_data_encoded_agg['Average KBB Fair Price ($)']
X = car_data_encoded_agg[functional_features]

X_train, X_test, y_train, y_test = train_test_split(X, y_price, test_size=0.2, random_state=15)


price_model = LinearRegression()
price_model.fit(X_train, y_train)
y_pred = price_model.predict(X_test)

r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))


price_coefficients = dict(zip(X.columns, price_model.coef_))

# Print R^2 and RMSE
print(f"R^2: {r2:.4f}")
print(f"\nRMSE: {rmse:.4f}")

# Print Model Coefficients
print("Model Coefficients:")
for feature, coef in price_coefficients.items():
    print(f"  {feature}: {coef:.4f}")

# Print Intercept
print(f"\nIntercept: {price_model.intercept_:.4f}")

division_title = "Functional Aggregated"
model_type = "Linear Regression"

results_list.append({
    "Division Title": division_title,
    "Model Type": model_type,
    "Best Alpha": None,
    "R^2": r2,
    "RMSE": rmse
})


R^2: 0.6385

RMSE: 5149.0639
Model Coefficients:
  Horsepower (hp): 45.7558
  Curb Weight (lbs): -0.8214
  Combined MPG: 11.3679
  Fuel Capacity (gallons): 190.6051
  Age: -2057.9485
  Drivetrain_4WD: 3148.6491
  Drivetrain_AWD: -2380.2253
  Drivetrain_FWD: -3474.6153
  Drivetrain_RWD: -453.2229
  Vehicle Class_compact-suv: -229.2713
  Vehicle Class_electric-car: 6718.1756
  Vehicle Class_electric-suv: 4723.7591
  Vehicle Class_full-size: -673.6605
  Vehicle Class_full-size-truck: -3125.2154
  Vehicle Class_hybrid-car: 1138.7061
  Vehicle Class_hybrid-suv: -2870.1388
  Vehicle Class_luxury-hybrid-suv: 5894.6217
  Vehicle Class_mid-size: 2429.0707
  Vehicle Class_mid-size-suv: 1234.6862
  Vehicle Class_mid-size-truck: 1678.9081
  Vehicle Class_minivan: 2773.7374

Intercept: 23573.2972


#### Functional Aggregated Lasso

In [9]:
best_rmse = float('inf')  # Initialize with a very high RMSE
best_alpha = None
best_coefficients = None
best_intercept = None

# Loop through each alpha value
for alpha in alpha_values:
    # Initialize and train the Lasso model with the current alpha
    lasso_model = Lasso(alpha=alpha)
    lasso_model.fit(X_train, y_train)
    y_pred = lasso_model.predict(X_test)
    
    # Calculate evaluation metrics
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    
    # Print RMSE and R^2 for this alpha
    print(f"\nAlpha: {alpha}")
    print(f"  R^2: {r2:.4f}")
    print(f"  RMSE: {rmse:.4f}")
    
    # Check if this model has the best RMSE so far
    if rmse < best_rmse:
        best_rmse = rmse
        best_alpha = alpha
        best_r2 = r2
        best_coefficients = dict(zip(X.columns, lasso_model.coef_))
        best_intercept = lasso_model.intercept_

# Print the best alpha and its coefficients
print(f"\nBest Alpha based on RMSE: {best_alpha}")
print(f"Best R^2: {best_r2:.4f}")
print(f"Best RMSE: {best_rmse:.4f}")

# Print coefficients for the best alpha
print("\nModel Coefficients for the Best Alpha:")
for feature, coef in best_coefficients.items():
    print(f"  {feature}: {coef:.4f}")

# Print Intercept for the best alpha
print(f"\nIntercept: {best_intercept:.4f}")

division_title = "Functional Aggregated"
model_type = "Lasso"

results_list.append({
    "Division Title": division_title,
    "Model Type": model_type,
    "Best Alpha": best_alpha,
    "R^2": best_r2,
    "RMSE": best_rmse
})



Alpha: 0.1
  R^2: 0.6385
  RMSE: 5148.5454

Alpha: 1.0
  R^2: 0.6392
  RMSE: 5143.9452

Alpha: 10.0
  R^2: 0.6456
  RMSE: 5097.6044

Alpha: 100
  R^2: 0.6615
  RMSE: 4981.9476

Alpha: 200
  R^2: 0.6496
  RMSE: 5068.7710

Alpha: 300
  R^2: 0.6413
  RMSE: 5128.5129

Alpha: 500
  R^2: 0.6410
  RMSE: 5130.6323

Alpha: 1000
  R^2: 0.6505
  RMSE: 5062.9034

Best Alpha based on RMSE: 100
Best R^2: 0.6615
Best RMSE: 4981.9476

Model Coefficients for the Best Alpha:
  Horsepower (hp): 49.8100
  Curb Weight (lbs): -0.5408
  Combined MPG: 60.2715
  Fuel Capacity (gallons): 121.7582
  Age: -1981.5986
  Drivetrain_4WD: 2389.6341
  Drivetrain_AWD: -721.1558
  Drivetrain_FWD: -1735.8183
  Drivetrain_RWD: 0.0000
  Vehicle Class_compact-suv: -455.5424
  Vehicle Class_electric-car: 0.0000
  Vehicle Class_electric-suv: 0.0000
  Vehicle Class_full-size: -436.3474
  Vehicle Class_full-size-truck: -0.0000
  Vehicle Class_hybrid-car: -0.0000
  Vehicle Class_hybrid-suv: -0.0000
  Vehicle Class_luxury-hybrid-

#### Functional Aggregated Ridge

In [10]:
best_rmse = float('inf')  # Initialize with a very high RMSE
best_alpha = None
best_coefficients = None
best_intercept = None

# Loop through each alpha value
for alpha in alpha_values:
    # Initialize and train the Lasso model with the current alpha
    ridge_model = Ridge(alpha=alpha)
    ridge_model.fit(X_train, y_train)
    y_pred = ridge_model.predict(X_test)
    
    # Calculate evaluation metrics
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    
    # Print RMSE and R^2 for this alpha
    print(f"\nAlpha: {alpha}")
    print(f"  R^2: {r2:.4f}")
    print(f"  RMSE: {rmse:.4f}")
    
    # Check if this model has the best RMSE so far
    if rmse < best_rmse:
        best_rmse = rmse
        best_alpha = alpha
        best_r2 = r2
        best_coefficients = dict(zip(X.columns, lasso_model.coef_))
        best_intercept = lasso_model.intercept_

# Print the best alpha and its coefficients
print(f"\nBest Alpha based on RMSE: {best_alpha}")
print(f"Best R^2: {best_r2:.4f}")
print(f"Best RMSE: {best_rmse:.4f}")

# Print coefficients for the best alpha
print("\nModel Coefficients for the Best Alpha:")
for feature, coef in best_coefficients.items():
    print(f"  {feature}: {coef:.4f}")

# Print Intercept for the best alpha
print(f"\nIntercept: {best_intercept:.4f}")

division_title = "Functional Aggregated"
model_type = "Ridge"

results_list.append({
    "Division Title": division_title,
    "Model Type": model_type,
    "Best Alpha": best_alpha,
    "R^2": best_r2,
    "RMSE": best_rmse
})


Alpha: 0.1
  R^2: 0.6397
  RMSE: 5140.4791

Alpha: 1.0
  R^2: 0.6469
  RMSE: 5088.8217

Alpha: 10.0
  R^2: 0.6616
  RMSE: 4981.1846

Alpha: 100
  R^2: 0.6538
  RMSE: 5038.9718

Alpha: 200
  R^2: 0.6539
  RMSE: 5038.0079

Alpha: 300
  R^2: 0.6562
  RMSE: 5020.7598

Alpha: 500
  R^2: 0.6611
  RMSE: 4985.4243

Alpha: 1000
  R^2: 0.6654
  RMSE: 4953.3072

Best Alpha based on RMSE: 1000
Best R^2: 0.6654
Best RMSE: 4953.3072

Model Coefficients for the Best Alpha:
  Horsepower (hp): 55.3304
  Curb Weight (lbs): 0.1221
  Combined MPG: 54.3327
  Fuel Capacity (gallons): 25.2373
  Age: -1903.6774
  Drivetrain_4WD: 0.0000
  Drivetrain_AWD: -0.0000
  Drivetrain_FWD: -0.0000
  Drivetrain_RWD: 0.0000
  Vehicle Class_compact-suv: -0.0000
  Vehicle Class_electric-car: 0.0000
  Vehicle Class_electric-suv: 0.0000
  Vehicle Class_full-size: -0.0000
  Vehicle Class_full-size-truck: -0.0000
  Vehicle Class_hybrid-car: -0.0000
  Vehicle Class_hybrid-suv: -0.0000
  Vehicle Class_luxury-hybrid-suv: 0.0000
 

# Functional and Experiential

### Functional and Experiental Unaggregated

In [25]:
features = functional_features + weighted_topic_features + [col for col in car_data_encoded.columns if col.startswith('Model Full Name_')]

X = car_data_encoded[features]
y_price = car_data_encoded['Average KBB Fair Price ($)']

X_train, X_test, y_train, y_test = train_test_split(X, y_price, test_size=0.2, random_state=15)


price_model = LinearRegression()
price_model.fit(X_train, y_train)
y_pred = price_model.predict(X_test)

r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))


price_coefficients = dict(zip(X.columns, price_model.coef_))

print(f"R^2: {r2:.4f}")
print(f"\nRMSE: {rmse:.4f}")

# Print Model Coefficients
print(f"\nModel Coefficients:")
for feature, coef in price_coefficients.items():
    print(f"  {feature}: {coef:.4f}")

# Print Intercept
print(f"\nIntercept: {price_model.intercept_:.4f}")

division_title = "Functional & Experiential Unaggregated"
model_type = "Linear Regression"

results_list.append({
    "Division Title": division_title,
    "Model Type": model_type,
    "Best Alpha": None,
    "R^2": r2,
    "RMSE": rmse
})

R^2: 0.9119

RMSE: 2610.2975

Model Coefficients:
  Horsepower (hp): 5.6206
  Curb Weight (lbs): -4.1024
  Combined MPG: -39.8883
  Fuel Capacity (gallons): -70.7154
  Age: -1818.9156
  Drivetrain_4WD: 408.0897
  Drivetrain_AWD: -1565.4837
  Drivetrain_FWD: -4207.0758
  Drivetrain_RWD: -2462.8222
  Vehicle Class_compact-suv: -375379290838.9056
  Vehicle Class_electric-car: -989468512746.6051
  Vehicle Class_electric-suv: -767464473718.1483
  Vehicle Class_full-size: -1190367864526.7673
  Vehicle Class_full-size-truck: -2743636871254.4028
  Vehicle Class_hybrid-car: 1411733363057.9629
  Vehicle Class_hybrid-suv: -1411697931.9679
  Vehicle Class_luxury-hybrid-suv: 501548094270.0939
  Vehicle Class_mid-size: -679967966240.3243
  Vehicle Class_mid-size-suv: -580398962017.1594
  Vehicle Class_mid-size-truck: 30130800828.7567
  Vehicle Class_minivan: -1462356663564.4062
  Topic_0_Weighted: -7599.1862
  Topic_2_Weighted: -1092.6232
  Topic_3_Weighted: 5602.0678
  Topic_4_Weighted: -1684.7956


#### Functional and Experiental Unaggregated Lasso

In [26]:
best_rmse = float('inf')  # Initialize with a very high RMSE
best_alpha = None
best_coefficients = None
best_intercept = None

# Loop through each alpha value
for alpha in alpha_values:
    # Initialize and train the Lasso model with the current alpha
    lasso_model = Lasso(alpha=alpha)
    lasso_model.fit(X_train, y_train)
    y_pred = lasso_model.predict(X_test)
    
    # Calculate evaluation metrics
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    
    # Print RMSE and R^2 for this alpha
    print(f"\nAlpha: {alpha}")
    print(f"  R^2: {r2:.4f}")
    print(f"  RMSE: {rmse:.4f}")
    
    # Check if this model has the best RMSE so far
    if rmse < best_rmse:
        best_rmse = rmse
        best_alpha = alpha
        best_r2 = r2
        best_coefficients = dict(zip(X.columns, lasso_model.coef_))
        best_intercept = lasso_model.intercept_

# Print the best alpha and its coefficients
print(f"\nBest Alpha based on RMSE: {best_alpha}")
print(f"Best R^2: {best_r2:.4f}")
print(f"Best RMSE: {best_rmse:.4f}")

# Print coefficients for the best alpha
print("\nModel Coefficients for the Best Alpha:")
for feature, coef in best_coefficients.items():
    print(f"  {feature}: {coef:.4f}")

# Print Intercept for the best alpha
print(f"\nIntercept: {best_intercept:.4f}")

division_title = "Functional & Experiential Unaggregated"
model_type = "Lasso"

results_list.append({
    "Division Title": division_title,
    "Model Type": model_type,
    "Best Alpha": best_alpha,
    "R^2": best_r2,
    "RMSE": best_rmse
})

  model = cd_fast.enet_coordinate_descent(



Alpha: 0.1
  R^2: 0.9118
  RMSE: 2611.2940

Alpha: 1.0
  R^2: 0.9091
  RMSE: 2650.4459

Alpha: 10.0
  R^2: 0.8666
  RMSE: 3211.5834

Alpha: 100
  R^2: 0.7631
  RMSE: 4279.5725

Alpha: 200
  R^2: 0.7520
  RMSE: 4378.6120

Alpha: 300
  R^2: 0.7434
  RMSE: 4454.1856

Alpha: 500
  R^2: 0.7307
  RMSE: 4562.9037

Alpha: 1000
  R^2: 0.7281
  RMSE: 4584.6565

Best Alpha based on RMSE: 0.1
Best R^2: 0.9118
Best RMSE: 2611.2940

Model Coefficients for the Best Alpha:
  Horsepower (hp): 6.7497
  Curb Weight (lbs): -4.0500
  Combined MPG: -39.2794
  Fuel Capacity (gallons): -54.4724
  Age: -1818.6749
  Drivetrain_4WD: 448.9855
  Drivetrain_AWD: -1574.2018
  Drivetrain_FWD: -4224.6725
  Drivetrain_RWD: -2386.4345
  Vehicle Class_compact-suv: -5657.8783
  Vehicle Class_electric-car: 1566.9592
  Vehicle Class_electric-suv: 6584.0864
  Vehicle Class_full-size: -311.2188
  Vehicle Class_full-size-truck: 795.0186
  Vehicle Class_hybrid-car: -6235.9042
  Vehicle Class_hybrid-suv: -2913.0119
  Vehicle Cl

#### Functional and Experiental Unaggregated Ridge

In [13]:
best_rmse = float('inf')  # Initialize with a very high RMSE
best_alpha = None
best_coefficients = None
best_intercept = None

# Loop through each alpha value
for alpha in alpha_values:
    # Initialize and train the Lasso model with the current alpha
    ridge_model = Ridge(alpha=alpha)
    ridge_model.fit(X_train, y_train)
    y_pred = ridge_model.predict(X_test)
    
    # Calculate evaluation metrics
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    
    # Print RMSE and R^2 for this alpha
    print(f"\nAlpha: {alpha}")
    print(f"  R^2: {r2:.4f}")
    print(f"  RMSE: {rmse:.4f}")
    
    # Check if this model has the best RMSE so far
    if rmse < best_rmse:
        best_rmse = rmse
        best_alpha = alpha
        best_r2 = r2
        best_coefficients = dict(zip(X.columns, lasso_model.coef_))
        best_intercept = lasso_model.intercept_

# Print the best alpha and its coefficients
print(f"\nBest Alpha based on RMSE: {best_alpha}")
print(f"Best R^2: {best_r2:.4f}")
print(f"Best RMSE: {best_rmse:.4f}")

# Print coefficients for the best alpha
print("\nModel Coefficients for the Best Alpha:")
for feature, coef in best_coefficients.items():
    print(f"  {feature}: {coef:.4f}")

# Print Intercept for the best alpha
print(f"\nIntercept: {best_intercept:.4f}")

division_title = "Functional & Experiential Unaggregated"
model_type = "Ridge"

results_list.append({
    "Division Title": division_title,
    "Model Type": model_type,
    "Best Alpha": best_alpha,
    "R^2": best_r2,
    "RMSE": best_rmse
})


Alpha: 0.1
  R^2: 0.9118
  RMSE: 2611.1601

Alpha: 1.0
  R^2: 0.9104
  RMSE: 2631.7941

Alpha: 10.0
  R^2: 0.8974
  RMSE: 2815.9590

Alpha: 100
  R^2: 0.8510
  RMSE: 3393.9230

Alpha: 200
  R^2: 0.8286
  RMSE: 3640.3930

Alpha: 300
  R^2: 0.8157
  RMSE: 3774.7758

Alpha: 500
  R^2: 0.8008
  RMSE: 3924.4103

Alpha: 1000
  R^2: 0.7833
  RMSE: 4092.7527

Best Alpha based on RMSE: 0.1
Best R^2: 0.9118
Best RMSE: 2611.1601

Model Coefficients for the Best Alpha:
  Horsepower (hp): 54.9404
  Curb Weight (lbs): -0.8939
  Combined MPG: 16.6959
  Fuel Capacity (gallons): 50.7800
  Age: -1623.9290
  Drivetrain_4WD: 0.0000
  Drivetrain_AWD: -0.0000
  Drivetrain_FWD: -0.0000
  Drivetrain_RWD: 0.0000
  Vehicle Class_compact-suv: -0.0000
  Vehicle Class_electric-car: 0.0000
  Vehicle Class_electric-suv: 0.0000
  Vehicle Class_full-size: -0.0000
  Vehicle Class_full-size-truck: -0.0000
  Vehicle Class_hybrid-car: -0.0000
  Vehicle Class_hybrid-suv: -0.0000
  Vehicle Class_luxury-hybrid-suv: 0.0000
 

### Functional and Experiental Aggregated

In [14]:
df.columns

Index(['Unnamed: 0', 'id', 'Car Brand', 'Car Model', 'Year', 'Review',
       'Rating', 'Value Rating', 'Performance Rating', 'Quality Rating',
       'Comfort Rating', 'Reliability Rating', 'Styling Rating',
       'Vehicle Class', 'Model Full Name', 'Horsepower (hp)',
       'Curb Weight (lbs)', 'Combined MPG', 'Fuel Capacity (gallons)',
       'Drivetrain', 'Recommended Fuel', 'Average MSRP Price ($)',
       'Average KBB Fair Price ($)', 'Review_Topics', 'Topic_0', 'Topic_1',
       'Topic_2', 'Topic_3', 'Topic_4', 'Topic_5', 'Topic_6', 'Topic_7',
       'Topic_8', 'Topic_9', 'Sentiment Score', 'Depreciated Value',
       'Sentiment Bin', 'Weighted Sentiment Score', 'Topic_0_Weighted',
       'Topic_2_Weighted', 'Topic_3_Weighted', 'Topic_4_Weighted',
       'Topic_5_Weighted', 'Topic_7_Weighted', 'Topic_8_Weighted',
       'Topic_9_Weighted', 'Age'],
      dtype='object')

In [15]:
car_data_encoded_agg

Unnamed: 0,Model Full Name,Year,Horsepower (hp),Curb Weight (lbs),Combined MPG,Fuel Capacity (gallons),Age,Average KBB Fair Price ($),Car Brand_alfa-romeo,Car Brand_audi,...,Vehicle Class_electric-suv,Vehicle Class_full-size,Vehicle Class_full-size-truck,Vehicle Class_hybrid-car,Vehicle Class_hybrid-suv,Vehicle Class_luxury-hybrid-suv,Vehicle Class_mid-size,Vehicle Class_mid-size-suv,Vehicle Class_mid-size-truck,Vehicle Class_minivan
0,Acura Rdx,2010,240.0,3931.0,19.0,18.0,14.0,7210.0,False,False,...,False,False,False,False,False,False,False,True,False,False
1,Acura Rdx,2013,273.0,3838.0,22.0,16.0,11.0,10589.0,False,False,...,False,False,False,False,False,False,False,True,False,False
2,Acura Rdx,2019,272.0,3783.0,24.0,17.1,5.0,23046.5,False,False,...,False,False,False,False,False,False,False,True,False,False
3,Acura Tlx,2015,206.0,3483.0,28.0,17.2,9.0,11905.5,False,False,...,False,False,False,False,False,False,True,False,False,False
4,Acura Tlx,2021,272.0,3709.0,25.0,15.9,3.0,29701.0,False,False,...,False,False,False,False,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
321,Volvo Xc40,2019,187.0,3574.0,27.0,14.2,5.0,18941.0,False,False,...,False,False,False,False,False,False,False,False,False,False
322,Volvo Xc60,2010,235.0,4012.0,21.0,18.5,14.0,7055.0,False,False,...,False,False,False,False,False,False,False,True,False,False
323,Volvo Xc60,2018,316.0,4074.0,23.0,18.8,6.0,23469.0,False,False,...,False,False,False,False,False,False,False,True,False,False
324,Volvo Xc90,2010,235.0,4751.0,17.0,21.1,14.0,6471.0,False,False,...,False,False,False,False,False,False,False,False,False,False


In [16]:
car_data_encoded_agg.columns

Index(['Model Full Name', 'Year', 'Horsepower (hp)', 'Curb Weight (lbs)',
       'Combined MPG', 'Fuel Capacity (gallons)', 'Age',
       'Average KBB Fair Price ($)', 'Car Brand_alfa-romeo', 'Car Brand_audi',
       ...
       'Vehicle Class_electric-suv', 'Vehicle Class_full-size',
       'Vehicle Class_full-size-truck', 'Vehicle Class_hybrid-car',
       'Vehicle Class_hybrid-suv', 'Vehicle Class_luxury-hybrid-suv',
       'Vehicle Class_mid-size', 'Vehicle Class_mid-size-suv',
       'Vehicle Class_mid-size-truck', 'Vehicle Class_minivan'],
      dtype='object', length=221)

In [17]:
#aggregated
aggregated_data = df.groupby(['Model Full Name', 'Year']).agg({
    'Horsepower (hp)': 'mean',
    'Curb Weight (lbs)': 'mean',
    'Combined MPG': 'mean',
    'Fuel Capacity (gallons)': 'mean',
    'Age': 'mean',
    'Average KBB Fair Price ($)': 'mean',
    'Vehicle Class': 'first',
    'Car Brand': 'first',
    #'Car Model': 'first',
    'Topic_0_Weighted': 'mean',
    'Topic_2_Weighted': 'mean', 
    'Topic_3_Weighted': 'mean', 
    'Topic_4_Weighted': 'mean',
    'Topic_5_Weighted': 'mean', 
    'Topic_7_Weighted': 'mean', 
    'Topic_8_Weighted': 'mean',
    'Topic_9_Weighted': 'mean',
    'Drivetrain': 'first'
}).reset_index()

car_data_encoded_agg = pd.get_dummies(aggregated_data, columns=['Car Brand', 'Drivetrain', 'Vehicle Class'], drop_first=True)


X = car_data_encoded_agg.drop(columns=['Average KBB Fair Price ($)','Model Full Name'])
y_price = car_data_encoded_agg['Average KBB Fair Price ($)']

X_train, X_test, y_train, y_test = train_test_split(X, y_price, test_size=0.2, random_state=15)


price_model = LinearRegression()
price_model.fit(X_train, y_train)
y_pred = price_model.predict(X_test)

r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))


price_coefficients = dict(zip(X.columns, price_model.coef_))

# Print R^2 and RMSE
print(f"R^2: {r2:.4f}")
print(f"\nRMSE: {rmse:.4f}")

# Print Model Coefficients
print(f"\nModel Coefficients:")
for feature, coef in price_coefficients.items():
    print(f"  {feature}: {coef:.4f}")

# Print Intercept
print(f"\nIntercept: {price_model.intercept_:.4f}")

division_title = "Functional & Experiential Aggregated"
model_type = "Linear Regression"

results_list.append({
    "Division Title": division_title,
    "Model Type": model_type,
    "Best Alpha": None,
    "R^2": r2,
    "RMSE": rmse
})


R^2: 0.6418

RMSE: 5125.4296

Model Coefficients:
  Year: 1054.7543
  Horsepower (hp): 41.3409
  Curb Weight (lbs): -1.1459
  Combined MPG: -39.4635
  Fuel Capacity (gallons): 286.1528
  Age: -1054.7543
  Topic_0_Weighted: -10652.3721
  Topic_2_Weighted: -18371.5924
  Topic_3_Weighted: 3694.2316
  Topic_4_Weighted: 17846.6242
  Topic_5_Weighted: -5344.4536
  Topic_7_Weighted: -5219.5138
  Topic_8_Weighted: 1886.8826
  Topic_9_Weighted: 3623.3385
  Car Brand_alfa-romeo: -4781.3345
  Car Brand_audi: -1394.1298
  Car Brand_bmw: -503.4102
  Car Brand_buick: -403.8515
  Car Brand_cadillac: -1767.0756
  Car Brand_chevrolet: 2673.1271
  Car Brand_chrysler: -1338.8401
  Car Brand_dodge: 2374.7856
  Car Brand_fiat: -3997.7462
  Car Brand_ford: -396.2822
  Car Brand_genesis: 936.0639
  Car Brand_gmc: 1598.7437
  Car Brand_honda: 417.2032
  Car Brand_hyundai: -2226.9507
  Car Brand_infiniti: -5729.8487
  Car Brand_jaguar: -5189.8643
  Car Brand_jeep: 939.6090
  Car Brand_kia: -2037.6115
  Car Bra

#### Functional and Experiental Aggregated lasso

In [18]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)  # X is your input features

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_price, test_size=0.2, random_state=15)

alpha_values = [0.1, 1.0, 10.0, 100, 200, 300, 500, 1000]
best_rmse = float('inf')  # Initialize with a very high RMSE
best_alpha = None
best_coefficients = None
best_intercept = None

# Loop through each alpha value
for alpha in alpha_values:
    # Initialize and train the Lasso model with the current alpha
    lasso_model = Lasso(alpha=alpha)
    lasso_model.fit(X_train, y_train)
    y_pred = lasso_model.predict(X_test)
    
    # Calculate evaluation metrics
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    
    # Print RMSE and R^2 for this alpha
    print(f"\nAlpha: {alpha}")
    print(f"  R^2: {r2:.4f}")
    print(f"  RMSE: {rmse:.4f}")
    
    # Check if this model has the best RMSE so far
    if rmse < best_rmse:
        best_rmse = rmse
        best_alpha = alpha
        best_r2 = r2
        best_coefficients = dict(zip(X.columns, lasso_model.coef_))
        best_intercept = lasso_model.intercept_

# Print the best alpha and its coefficients
print(f"\nBest Alpha based on RMSE: {best_alpha}")
print(f"Best R^2: {best_r2:.4f}")
print(f"Best RMSE: {best_rmse:.4f}")

# Print coefficients for the best alpha
print("\nModel Coefficients for the Best Alpha:")
for feature, coef in best_coefficients.items():
    print(f"  {feature}: {coef:.4f}")

# Print Intercept for the best alpha
print(f"\nIntercept: {best_intercept:.4f}")

division_title = "Functional & Experiential Aggregated"
model_type = "Lasso"

results_list.append({
    "Division Title": division_title,
    "Model Type": model_type,
    "Best Alpha": best_alpha,
    "R^2": best_r2,
    "RMSE": best_rmse
})



Alpha: 0.1
  R^2: 0.6419
  RMSE: 5124.5463

Alpha: 1.0
  R^2: 0.6430
  RMSE: 5116.3409

Alpha: 10.0
  R^2: 0.6538
  RMSE: 5038.6480

Alpha: 100
  R^2: 0.7050
  RMSE: 4650.9941

Alpha: 200
  R^2: 0.7189
  RMSE: 4539.8598

Alpha: 300
  R^2: 0.7179
  RMSE: 4548.0935

Alpha: 500
  R^2: 0.7157
  RMSE: 4566.2031

Alpha: 1000
  R^2: 0.6996
  RMSE: 4693.6665

Best Alpha based on RMSE: 200
Best R^2: 0.7189
Best RMSE: 4539.8598

Model Coefficients for the Best Alpha:
  Year: 8730.2168
  Horsepower (hp): 3471.9758
  Curb Weight (lbs): -0.0000
  Combined MPG: 0.0000
  Fuel Capacity (gallons): 16.8687
  Age: -135.8829
  Topic_0_Weighted: -0.0000
  Topic_2_Weighted: -73.3415
  Topic_3_Weighted: 0.0000
  Topic_4_Weighted: 60.6928
  Topic_5_Weighted: -0.0000
  Topic_7_Weighted: -0.0000
  Topic_8_Weighted: 0.0000
  Topic_9_Weighted: 0.0000
  Car Brand_alfa-romeo: -169.6145
  Car Brand_audi: -0.0000
  Car Brand_bmw: -0.0000
  Car Brand_buick: -71.4522
  Car Brand_cadillac: -0.0000
  Car Brand_chevrolet

  model = cd_fast.enet_coordinate_descent(


#### Functional and Experiental Aggregated Ridge

In [19]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_price, test_size=0.2, random_state=15)

alpha_values = [0.1, 1.0, 10.0, 100, 200, 300, 500, 1000]
best_rmse = float('inf')  # Initialize with a very high RMSE
best_alpha = None
best_coefficients = None
best_intercept = None

# Loop through each alpha value
for alpha in alpha_values:
    # Initialize and train the Lasso model with the current alpha
    ridge_model = Ridge(alpha=alpha)
    ridge_model.fit(X_train, y_train)
    y_pred = ridge_model.predict(X_test)
    
    # Calculate evaluation metrics
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    
    # Print RMSE and R^2 for this alpha
    print(f"\nAlpha: {alpha}")
    print(f"  R^2: {r2:.4f}")
    print(f"  RMSE: {rmse:.4f}")
    
    # Check if this model has the best RMSE so far
    if rmse < best_rmse:
        best_rmse = rmse
        best_alpha = alpha
        best_r2 = r2
        best_coefficients = dict(zip(X.columns, lasso_model.coef_))
        best_intercept = lasso_model.intercept_

# Print the best alpha and its coefficients
print(f"\nBest Alpha based on RMSE: {best_alpha}")
print(f"Best R^2: {best_r2:.4f}")
print(f"Best RMSE: {best_rmse:.4f}")

# Print coefficients for the best alpha
print("\nModel Coefficients for the Best Alpha:")
for feature, coef in best_coefficients.items():
    print(f"  {feature}: {coef:.4f}")

# Print Intercept for the best alpha
print(f"\nIntercept: {best_intercept:.4f}")

division_title = "Functional & Experiential Aggregated"
model_type = "Ridge"

results_list.append({
    "Division Title": division_title,
    "Model Type": model_type,
    "Best Alpha": best_alpha,
    "R^2": best_r2,
    "RMSE": best_rmse
})


Alpha: 0.1
  R^2: 0.6423
  RMSE: 5121.7153

Alpha: 1.0
  R^2: 0.6467
  RMSE: 5089.9815

Alpha: 10.0
  R^2: 0.6768
  RMSE: 4868.0279

Alpha: 100
  R^2: 0.7497
  RMSE: 4284.0240

Alpha: 200
  R^2: 0.7496
  RMSE: 4285.2552

Alpha: 300
  R^2: 0.7290
  RMSE: 4457.7894

Alpha: 500
  R^2: 0.6702
  RMSE: 4917.4849

Alpha: 1000
  R^2: 0.5272
  RMSE: 5888.2697

Best Alpha based on RMSE: 100
Best R^2: 0.7497
Best RMSE: 4284.0240

Model Coefficients for the Best Alpha:
  Year: 8066.8338
  Horsepower (hp): 3543.5767
  Curb Weight (lbs): 0.0000
  Combined MPG: 25.4894
  Fuel Capacity (gallons): 0.0000
  Age: -18.1410
  Topic_0_Weighted: -0.0000
  Topic_2_Weighted: -0.0000
  Topic_3_Weighted: 0.0000
  Topic_4_Weighted: 0.0000
  Topic_5_Weighted: -0.0000
  Topic_7_Weighted: -0.0000
  Topic_8_Weighted: 0.0000
  Topic_9_Weighted: 0.0000
  Car Brand_alfa-romeo: -0.0000
  Car Brand_audi: -0.0000
  Car Brand_bmw: -0.0000
  Car Brand_buick: -0.0000
  Car Brand_cadillac: -0.0000
  Car Brand_chevrolet: 0.000

# All Feature Model

In [20]:
df.columns.values

array(['Unnamed: 0', 'id', 'Car Brand', 'Car Model', 'Year', 'Review',
       'Rating', 'Value Rating', 'Performance Rating', 'Quality Rating',
       'Comfort Rating', 'Reliability Rating', 'Styling Rating',
       'Vehicle Class', 'Model Full Name', 'Horsepower (hp)',
       'Curb Weight (lbs)', 'Combined MPG', 'Fuel Capacity (gallons)',
       'Drivetrain', 'Recommended Fuel', 'Average MSRP Price ($)',
       'Average KBB Fair Price ($)', 'Review_Topics', 'Topic_0',
       'Topic_1', 'Topic_2', 'Topic_3', 'Topic_4', 'Topic_5', 'Topic_6',
       'Topic_7', 'Topic_8', 'Topic_9', 'Sentiment Score',
       'Depreciated Value', 'Sentiment Bin', 'Weighted Sentiment Score',
       'Topic_0_Weighted', 'Topic_2_Weighted', 'Topic_3_Weighted',
       'Topic_4_Weighted', 'Topic_5_Weighted', 'Topic_7_Weighted',
       'Topic_8_Weighted', 'Topic_9_Weighted', 'Age'], dtype=object)

## Results

In [21]:
results_df = pd.DataFrame(results_list).round({"R^2": 2, "RMSE": 2})
print("\nResults Summary:")
results_df


Results Summary:


Unnamed: 0,Division Title,Model Type,Best Alpha,R^2,RMSE
0,Functional Unaggregated,Linear Regression,,0.78,4123.8
1,Functional Unaggregated,Lasso,0.1,0.78,4123.79
2,Functional Unaggregated,Ridge,0.1,0.78,4123.8
3,Functional Aggregated,Linear Regression,,0.64,5149.06
4,Functional Aggregated,Lasso,100.0,0.66,4981.95
5,Functional Aggregated,Ridge,1000.0,0.67,4953.31
6,Functional & Experiential Unaggregated,Linear Regression,,0.91,2610.3
7,Functional & Experiential Unaggregated,Lasso,0.1,0.91,2611.29
8,Functional & Experiential Unaggregated,Ridge,0.1,0.91,2611.16
9,Functional & Experiential Aggregated,Linear Regression,,0.64,5125.43


In [22]:
car_data_encoded

Unnamed: 0.1,Unnamed: 0,id,Car Brand,Car Model,Year,Review,Rating,Value Rating,Performance Rating,Quality Rating,...,Vehicle Class_electric-suv,Vehicle Class_full-size,Vehicle Class_full-size-truck,Vehicle Class_hybrid-car,Vehicle Class_hybrid-suv,Vehicle Class_luxury-hybrid-suv,Vehicle Class_mid-size,Vehicle Class_mid-size-suv,Vehicle Class_mid-size-truck,Vehicle Class_minivan
0,0,0,honda,accord,2010,I've driven my LX over 13 years and the only t...,5.0,5.0,5.0,5.0,...,False,False,False,False,False,False,True,False,False,False
1,1,1,honda,accord,2010,I have this car for 10 years have given me no ...,5.0,5.0,5.0,5.0,...,False,False,False,False,False,False,True,False,False,False
2,2,2,honda,accord,2010,"My 2012 Honda Accord is great, It handles grea...",5.0,4.0,5.0,5.0,...,False,False,False,False,False,False,True,False,False,False
3,3,3,honda,accord,2010,My sibling has this car. I may be a computer p...,5.0,5.0,5.0,5.0,...,False,False,False,False,False,False,True,False,False,False
4,4,4,honda,accord,2010,"I've had this car for 7 years, it's dependable...",5.0,5.0,5.0,5.0,...,False,False,False,False,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15255,15255,15261,mercedes-benz,a-class,2019,I just traded in my 2016 Mercedes GLA 250 for ...,5.0,5.0,5.0,5.0,...,False,False,False,False,False,False,True,False,False,False
15256,15256,15262,mercedes-benz,a-class,2019,"Great car, fun to drive and very quick. I had ...",5.0,4.5,4.0,5.0,...,False,False,False,False,False,False,True,False,False,False
15257,15257,15263,mercedes-benz,a-class,2019,Awesome car. Really smooth drive satisfying lo...,5.0,5.0,3.0,5.0,...,False,False,False,False,False,False,True,False,False,False
15258,15258,15264,mercedes-benz,a-class,2019,I got this car just because I have always boug...,1.0,2.0,1.0,2.0,...,False,False,False,False,False,False,True,False,False,False
