In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier


### Read data from csv

In [2]:
data_train = pd.read_csv('train_test_data/train_data.csv')
data_test = pd.read_csv('train_test_data/test_data.csv')
data_train.head()


Unnamed: 0,Screen size,Screen type,Chip,RAM,ROM,Batery,OS,Screen resolution,Mobile network,Camera count,Camera max MP,Price
0,-0.46742,1,2,0.855526,-0.040526,-0.418828,0,2592000.0,0.600621,4,1.278035,-0.520814
1,1.781956,1,3,2.368879,1.952557,0.81551,0,4254336.0,0.600621,5,2.980678,1.274604
2,-0.46742,0,2,-0.657828,-0.897009,0.81551,0,2592000.0,-1.664943,5,0.624966,-0.761271
3,-2.372521,1,0,-0.657828,-0.897009,-1.860525,1,2527200.0,0.600621,2,-1.147649,0.152468
4,1.444264,1,0,0.855526,1.672439,0.81551,1,2275550.0,0.600621,0,0.043276,-0.128066


### Data information

#### Data train information

In [3]:
data_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1241 entries, 0 to 1240
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Screen size        1241 non-null   float64
 1   Screen type        1241 non-null   int64  
 2   Chip               1241 non-null   int64  
 3   RAM                1241 non-null   float64
 4   ROM                1241 non-null   float64
 5   Batery             1241 non-null   float64
 6   OS                 1241 non-null   int64  
 7   Screen resolution  1241 non-null   float64
 8   Mobile network     1241 non-null   float64
 9   Camera count       1241 non-null   int64  
 10  Camera max MP      1241 non-null   float64
 11  Price              1241 non-null   float64
dtypes: float64(8), int64(4)
memory usage: 116.5 KB


- Data train has 1241 records
- No fields are null

#### Data test information

In [4]:
data_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 533 entries, 0 to 532
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Screen size        533 non-null    float64
 1   Screen type        533 non-null    int64  
 2   Chip               533 non-null    int64  
 3   RAM                533 non-null    float64
 4   ROM                533 non-null    float64
 5   Batery             533 non-null    float64
 6   OS                 533 non-null    int64  
 7   Screen resolution  533 non-null    float64
 8   Mobile network     533 non-null    float64
 9   Camera count       533 non-null    int64  
 10  Camera max MP      533 non-null    float64
 11  Price              533 non-null    float64
dtypes: float64(8), int64(4)
memory usage: 50.1 KB


- Data test has 533 records
- No fields are null

### Modeling Train Data

#### Get x_train and y_train in data_train


In [5]:
x_train = data_train.drop('Price', axis=1)
x_train

Unnamed: 0,Screen size,Screen type,Chip,RAM,ROM,Batery,OS,Screen resolution,Mobile network,Camera count,Camera max MP
0,-0.467420,1,2,0.855526,-0.040526,-0.418828,0,2.592000e+06,0.600621,4,1.278035
1,1.781956,1,3,2.368879,1.952557,0.815510,0,4.254336e+06,0.600621,5,2.980678
2,-0.467420,0,2,-0.657828,-0.897009,0.815510,0,2.592000e+06,-1.664943,5,0.624966
3,-2.372521,1,0,-0.657828,-0.897009,-1.860525,1,2.527200e+06,0.600621,2,-1.147649
4,1.444264,1,0,0.855526,1.672439,0.815510,1,2.275550e+06,0.600621,0,0.043276
...,...,...,...,...,...,...,...,...,...,...,...
1236,0.431185,1,3,0.855526,-0.040526,-0.078938,0,2.592000e+06,-1.664943,5,1.278035
1237,-0.467420,1,0,0.098849,1.952557,-0.254904,1,3.566952e+06,0.600621,2,-1.147649
1238,-0.467420,0,2,-0.657828,-0.897009,0.815510,0,1.152000e+06,-1.664943,3,-1.101002
1239,1.444264,1,1,0.855526,1.672439,0.457731,0,2.592000e+06,0.600621,0,0.043276


In [6]:
y_train = data_train['Price']
y_train

0      -0.520814
1       1.274604
2      -0.761271
3       0.152468
4      -0.128066
          ...   
1236   -0.568905
1237    1.915825
1238   -0.769287
1239    0.794490
1240    0.745597
Name: Price, Length: 1241, dtype: float64

#### Get x_test and y_test in data_test

In [7]:
x_test = data_test.drop(['Price'], axis=1)
x_test

Unnamed: 0,Screen size,Screen type,Chip,RAM,ROM,Batery,OS,Screen resolution,Mobile network,Camera count,Camera max MP
0,-0.467420,0,2,0.855526,-0.040526,0.815510,0,2.275550e+06,0.600621,0,0.043276
1,1.444264,1,1,0.855526,-0.040526,-0.078938,0,2.275550e+06,0.600621,0,0.043276
2,-0.581893,1,0,-0.657828,-0.040526,-3.093229,1,2.962440e+06,0.600621,2,-1.147649
3,1.781956,1,3,2.368879,1.672439,0.815510,0,4.446720e+06,0.600621,0,0.043276
4,1.781956,1,0,2.368879,-0.040526,0.815510,1,4.608000e+06,0.600621,5,3.330536
...,...,...,...,...,...,...,...,...,...,...,...
528,-0.467420,1,3,0.098849,-0.040526,-0.526162,0,2.592000e+06,0.600621,4,1.278035
529,-0.467420,0,0,0.002103,0.242817,-0.254904,1,2.275550e+06,0.600621,0,0.043276
530,1.545571,1,0,2.368879,1.672439,0.099952,1,4.608000e+06,0.600621,4,0.624966
531,-0.467420,1,0,-0.657828,-0.040526,-0.254904,1,2.527200e+06,0.600621,2,-1.147649


In [8]:
y_test = data_test['Price']
y_test

0     -0.448676
1      0.873841
2      0.360865
3      1.515062
4      0.521170
         ...   
528   -0.448676
529   -0.991309
530    0.713536
531    0.445025
532    0.521170
Name: Price, Length: 533, dtype: float64

#### Creating and Modeling using Linear Regression

In [9]:
lrg = LinearRegression()

In [10]:
lrg.fit(x_train, y_train)

In [11]:
lrg.score(x_test, y_test)

0.08154057322087538

#### Creating and Modeling using KNN Model