In [1]:
import pandas as pd

In [2]:
cars_data = pd.read_csv("vehicles_cleaned.csv")

In [5]:
print(f"Columns in DataFrame :\n{cars_data.columns}")
print(f"\nFirst row of data in DataFrame :\n{cars_data.iloc[0]}")

Columns in DataFrame :
Index(['price', 'year', 'condition', 'cylinders', 'fuel', 'odometer',
       'transmission', 'size', 'type'],
      dtype='object')

First row of data in DataFrame :
price                  7000
year                   2011
condition              good
cylinders       4 cylinders
fuel                    gas
odometer              76202
transmission      automatic
size                compact
type                  sedan
Name: 0, dtype: object


![Image](dummies_for_categories.png)

In [8]:
cars_data_dummies = pd.get_dummies(
    cars_data,
    columns=[
        "condition",
        "cylinders",
        "fuel",
        "transmission",
        "size",
        "type",
    ],
    drop_first=True,
)

# Add intercept : coefficient added to weighted combintaion of other variables
cars_data_dummies["intercept"] = 1

print(f"Columns in DataFrame with dummies :\n{cars_data_dummies.columns}")
print(f"\nFirst row of data in DataFrame with dummies :\n{cars_data_dummies.iloc[0]}")

Columns in DataFrame with dummies :
Index(['price', 'year', 'odometer', 'condition_fair', 'condition_good',
       'condition_like new', 'condition_new', 'condition_salvage',
       'cylinders_6 cylinders', 'fuel_gas', 'transmission_manual',
       'size_full-size', 'size_mid-size', 'size_sub-compact', 'type_hatchback',
       'type_sedan', 'type_wagon', 'intercept'],
      dtype='object')

First row of data in DataFrame with dummies :
price                     7000
year                      2011
odometer                 76202
condition_fair               0
condition_good               1
condition_like new           0
condition_new                0
condition_salvage            0
cylinders_6 cylinders        0
fuel_gas                     1
transmission_manual          0
size_full-size               0
size_mid-size                0
size_sub-compact             0
type_hatchback               0
type_sedan                   1
type_wagon                   0
intercept                    1
Na

In [11]:
# Build the model
# Take out price, which is the one you want to predict (A)
A = cars_data_dummies.drop(columns=["price"]).to_numpy()

# Create numoy array with only price for independant terms (b)
b = cars_data_dummies.loc[:, "price"].to_numpy()



In [15]:
# Get x using least squares
from scipy import linalg

p1, *_ = linalg.lstsq(A, b)
print(f"Model - Least squares solution for co-efficients :\n{p1}")


p2 = linalg.pinv(A) @ b
print(f"\nModel - Inverse of co-efficient matrix solution for co-efficients :\n{p1}")


Model - Least squares solution for co-efficients :
[ 8.47362988e+02 -3.53913729e-02 -3.47144752e+03 -1.66981155e+03
 -1.80240398e+02 -7.15885691e+03 -6.36540791e+03  3.76583261e+03
 -1.84837210e+03  1.31935783e+03  6.60484388e+02  6.38913933e+02
  1.54163679e+02 -1.76423109e+03 -1.99439766e+03  6.97365788e+02
 -1.68998811e+06]

Model - Inverse of co-efficient matrix solution for co-efficients :
[ 8.47362988e+02 -3.53913729e-02 -3.47144752e+03 -1.66981155e+03
 -1.80240398e+02 -7.15885691e+03 -6.36540791e+03  3.76583261e+03
 -1.84837210e+03  1.31935783e+03  6.60484388e+02  6.38913933e+02
  1.54163679e+02 -1.76423109e+03 -1.99439766e+03  6.97365788e+02
 -1.68998811e+06]
