### 1. Import required libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
import warnings
warnings.filterwarnings('ignore')


### 2. Load the dataset

In [3]:
df= pd.read_csv(r'C:\Users\udhay\Downloads\car+data.csv')
df.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0


### 3. Check the shape and basic information of the dataset.

In [5]:
df.shape

(301, 9)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 301 entries, 0 to 300
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Car_Name       301 non-null    object 
 1   Year           301 non-null    int64  
 2   Selling_Price  301 non-null    float64
 3   Present_Price  301 non-null    float64
 4   Kms_Driven     301 non-null    int64  
 5   Fuel_Type      301 non-null    object 
 6   Seller_Type    301 non-null    object 
 7   Transmission   301 non-null    object 
 8   Owner          301 non-null    int64  
dtypes: float64(2), int64(3), object(4)
memory usage: 21.3+ KB


### 4. Check for the presence of the duplicate records in the dataset? If present drop them

In [7]:
df.duplicated().sum()

2

In [8]:
df.drop_duplicates(inplace=True)

In [9]:
df.duplicated().sum()

0

### 5. Drop the columns which you think redundant for the analysis.

In [10]:
df.drop('Car_Name',axis=1,inplace=True)

### 6. Extract a new feature called 'age_of_the_car' from the feature 'year' and drop the feature year

In [11]:
df['age_of_the_car']=2024-df['Year']

In [12]:
df.drop('Year',axis=1,inplace=True)

In [13]:
df.head()

Unnamed: 0,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner,age_of_the_car
0,3.35,5.59,27000,Petrol,Dealer,Manual,0,10
1,4.75,9.54,43000,Diesel,Dealer,Manual,0,11
2,7.25,9.85,6900,Petrol,Dealer,Manual,0,7
3,2.85,4.15,5200,Petrol,Dealer,Manual,0,13
4,4.6,6.87,42450,Diesel,Dealer,Manual,0,10


### 7. Encode the categorical columns

In [16]:
df['Fuel_Type'].unique()

array(['Petrol', 'Diesel', 'CNG'], dtype=object)

In [19]:
df['Fuel_Type']=df['Fuel_Type'].replace({'Petrol':0, 'Diesel':1, 'CNG':2})
df['Seller_Type']=df['Seller_Type'].replace({'Dealer':0, 'Individual':1})
df['Transmission']=df['Transmission'].replace({'Manual':0, 'Automatic':1})

In [20]:
df.head()

Unnamed: 0,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner,age_of_the_car,SEller_Type
0,3.35,5.59,27000,0,0,0,0,10,0
1,4.75,9.54,43000,1,0,0,0,11,0
2,7.25,9.85,6900,0,0,0,0,7,0
3,2.85,4.15,5200,0,0,0,0,13,0
4,4.6,6.87,42450,1,0,0,0,10,0


### 8. Separate the target and independent features.

In [21]:
x = df.drop('Selling_Price',axis=1)
y=df['Selling_Price']

### 9. Split the data into train and test.

In [22]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=0)
print(x_train.shape,x_test.shape)
print(y_train.shape,y_test.shape)

(209, 8) (90, 8)
(209,) (90,)


### 10. Build a Random forest Regressor model and check the r2-score for train and test.

In [23]:
rf = RandomForestRegressor()
rf.fit(x_train,y_train)

In [24]:
y_train_pred = rf.predict(x_train)
y_test_pred = rf.predict(x_test)

r2_train = r2_score(y_train,y_train_pred)
r2_test = r2_score(y_test,y_test_pred)

print(r2_train)
print(r2_test)


0.9769530424745336
0.8909829533030025


### 12. Create new folder/new project in visual studio/pycharm that should contain the "model.pkl" file *make sure you are using a virutal environment and install required packages.*

### a) Create a basic HTML form for the frontend

In [None]:
### shared the file

### b) Create app.py file and write the predict function

In [25]:
from flask import Flask, render_template,request,jsonify
import pickle
import numpy as np
import sklearn

app=Flask(__name__)
model= pickle.load(open('model.pkl','rb'))
@app.route('/',methods=['GET'])

def Home():
    return render_template('index.html')

@app.route('/predict',methods=['POST'])

def predict():
    if request.method =='POST':
        Present_price = float(request.form['Present_Price'])
        Kms_Driven = int(request.form['Kms_Driven'])
        Owner = int(request.form['Owner'])
        Fuel_Type =request.form['Fuel_Type']
        Age_of_the_car =request.form['age_of_the_car']
        Seller_Type=request.form['Seller_Type']
        Transmission = request.form['Transmission']
        
        prediction = model.predict([Present_Price,Kms_Driven,Owner,Fuel_Type,Age_of_the_car,Seller_Type,Trnsmission])
        output=round(prediction[0],2)
        return render_template('index.html',prediction_text="you can sell your car at {} lakhs".format(output))
    
if __name__ =='__main__':
    app.run(debug=True)
    
        



FileNotFoundError: [Errno 2] No such file or directory: 'model.pkl'