# Home Prices and Car Prices Prediction

## Home Prices

In [69]:
import pandas as pd 
import polars as pl 
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

In [32]:
df = pl.read_csv('homeprices_dummy.csv')
df 

town,area,price
str,i64,i64
"""monroe township""",2600,550000
"""monroe township""",3000,565000
"""monroe township""",3200,610000
"""monroe township""",3600,680000
"""monroe township""",4000,725000
…,…,…
"""west windsor""",3600,710000
"""robinsville""",2600,575000
"""robinsville""",2900,600000
"""robinsville""",3100,620000


In [33]:
# dummies = pd.get_dummies(df['town'])
# dummies

dummies = df["town"].to_dummies()
dummies

town_monroe township,town_robinsville,town_west windsor
u8,u8,u8
1,0,0
1,0,0
1,0,0
1,0,0
1,0,0
…,…,…
0,0,1
0,1,0
0,1,0
0,1,0


In [34]:
## if pandas
# merged = pd.concat([df, dummies], axis = 'columns')

In [35]:
# for polars 
merged = pl.concat([df, dummies], how="horizontal")
merged

town,area,price,town_monroe township,town_robinsville,town_west windsor
str,i64,i64,u8,u8,u8
"""monroe township""",2600,550000,1,0,0
"""monroe township""",3000,565000,1,0,0
"""monroe township""",3200,610000,1,0,0
"""monroe township""",3600,680000,1,0,0
"""monroe township""",4000,725000,1,0,0
…,…,…,…,…,…
"""west windsor""",3600,710000,0,0,1
"""robinsville""",2600,575000,0,1,0
"""robinsville""",2900,600000,0,1,0
"""robinsville""",3100,620000,0,1,0


In [36]:
merged = merged.drop(['town', 'town_west windsor'])
merged 

area,price,town_monroe township,town_robinsville
i64,i64,u8,u8
2600,550000,1,0
3000,565000,1,0
3200,610000,1,0
3600,680000,1,0
4000,725000,1,0
…,…,…,…
3600,710000,0,0
2600,575000,0,1
2900,600000,0,1
3100,620000,0,1


In [38]:
model = LinearRegression()

In [39]:
X = merged.drop('price')
X 

area,town_monroe township,town_robinsville
i64,u8,u8
2600,1,0
3000,1,0
3200,1,0
3600,1,0
4000,1,0
…,…,…
3600,0,0
2600,0,1
2900,0,1
3100,0,1


In [40]:
y = merged['price']
y 

price
i64
550000
565000
610000
680000
725000
…
710000
575000
600000
620000


In [41]:
model.fit(X, y)

In [43]:
model.predict([[2800, 0, 1]])



array([590775.63964739])

In [44]:
model.predict([[3400, 0, 0]])




array([681241.66845839])

In [45]:
model.score(X, y)

0.9573929037221873

## Using sklearn LabelEncoder()

In [6]:
# Using LabelEncoder()
le = LabelEncoder()

In [9]:
# # if pandas
# dfle = df
# dfle['town'] = le.fit_transform(dfle['town'])
# dfle 

In [46]:
# for polars df 
dfle = df 
dfle = dfle.with_columns(
    pl.Series('town', le.fit_transform(dfle['town'].to_list()))
)


dfle 

town,area,price
i64,i64,i64
0,2600,550000
0,3000,565000
0,3200,610000
0,3600,680000
0,4000,725000
…,…,…
2,3600,710000
1,2600,575000
1,2900,600000
1,3100,620000


In [77]:
X = dfle['town', 'area']
X.to_pandas()
X

town,area
i64,i64
0,2600
0,3000
0,3200
0,3600
0,4000
…,…
2,3600
1,2600
1,2900
1,3100


In [64]:
y = dfle['price']
y

price
i64
550000
565000
610000
680000
725000
…
710000
575000
600000
620000


In [78]:
ct = ColumnTransformer([("town", OneHotEncoder(), [0])], remainder = 'passthrough')

In [80]:
# X = ct.fit_transform(X)
# X 


## Car Prices

In [81]:
# Predict prices of:
# 1. Mercedes, 4 yrs old, 45k mileage
# 2. BMW X5, 7 yrs old, 86k mileage
# 3. get score() of model 


In [83]:
import pandas as pd 
import polars as pl 
from sklearn.linear_model import LinearRegression

In [84]:
df = pl.read_csv('carprices.csv')
df 

Car Model,Mileage,Sell Price($),Age(yrs)
str,i64,i64,i64
"""BMW X5""",69000,18000,6
"""BMW X5""",35000,34000,3
"""BMW X5""",57000,26100,5
"""BMW X5""",22500,40000,2
"""BMW X5""",46000,31500,4
…,…,…,…
"""Audi A5""",91000,12000,8
"""Mercedez Benz C class""",67000,22000,6
"""Mercedez Benz C class""",83000,20000,7
"""Mercedez Benz C class""",79000,21000,7


In [85]:
df['Car Model'].unique()

Car Model
str
"""BMW X5"""
"""Mercedez Benz C class"""
"""Audi A5"""


In [87]:
dummies = df['Car Model'].to_dummies()
dummies

Car Model_Audi A5,Car Model_BMW X5,Car Model_Mercedez Benz C class
u8,u8,u8
0,1,0
0,1,0
0,1,0
0,1,0
0,1,0
…,…,…
1,0,0
0,0,1
0,0,1
0,0,1


In [88]:
df = pl.concat([df, dummies], how='horizontal')
df 

Car Model,Mileage,Sell Price($),Age(yrs),Car Model_Audi A5,Car Model_BMW X5,Car Model_Mercedez Benz C class
str,i64,i64,i64,u8,u8,u8
"""BMW X5""",69000,18000,6,0,1,0
"""BMW X5""",35000,34000,3,0,1,0
"""BMW X5""",57000,26100,5,0,1,0
"""BMW X5""",22500,40000,2,0,1,0
"""BMW X5""",46000,31500,4,0,1,0
…,…,…,…,…,…,…
"""Audi A5""",91000,12000,8,1,0,0
"""Mercedez Benz C class""",67000,22000,6,0,0,1
"""Mercedez Benz C class""",83000,20000,7,0,0,1
"""Mercedez Benz C class""",79000,21000,7,0,0,1


In [89]:
df = df.drop(['Car Model', 'Car Model_Mercedez Benz C class'])
df 

Mileage,Sell Price($),Age(yrs),Car Model_Audi A5,Car Model_BMW X5
i64,i64,i64,u8,u8
69000,18000,6,0,1
35000,34000,3,0,1
57000,26100,5,0,1
22500,40000,2,0,1
46000,31500,4,0,1
…,…,…,…,…
91000,12000,8,1,0
67000,22000,6,0,0
83000,20000,7,0,0
79000,21000,7,0,0


In [90]:
X = df.drop('Sell Price($)')
X 

Mileage,Age(yrs),Car Model_Audi A5,Car Model_BMW X5
i64,i64,u8,u8
69000,6,0,1
35000,3,0,1
57000,5,0,1
22500,2,0,1
46000,4,0,1
…,…,…,…
91000,8,1,0
67000,6,0,0
83000,7,0,0
79000,7,0,0


In [91]:
y = df['Sell Price($)']
y

Sell Price($)
i64
18000
34000
26100
40000
31500
…
12000
22000
20000
21000


In [92]:
model = LinearRegression()

In [93]:
model.fit(X, y)

In [94]:
# 3. get score() of model 
model.score(X, y)

0.9417050937281082

In [95]:
# 1. Mercedes, 4 yrs old, 45k mileage

model.predict([[45000, 4, 0, 0]])



array([36991.31721061])

In [96]:
# 2. BMW X5, 7 yrs old, 86k mileage

model.predict([[86000, 7, 0, 1]])



array([11080.74313219])