In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix


In [2]:
# load the data
cars = pd.read_csv("mtcars.csv")
cars.head()

Unnamed: 0,car_model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1


In [27]:
# Basic Checks
cars.shape

(32, 12)

In [5]:
cars.isnull().sum()

car_model    0
mpg          0
cyl          0
disp         0
hp           0
drat         0
wt           0
qsec         0
vs           0
am           0
gear         0
carb         0
dtype: int64

In [6]:
cars.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32 entries, 0 to 31
Data columns (total 12 columns):
car_model    32 non-null object
mpg          32 non-null float64
cyl          32 non-null int64
disp         32 non-null float64
hp           32 non-null int64
drat         32 non-null float64
wt           32 non-null float64
qsec         32 non-null float64
vs           32 non-null int64
am           32 non-null int64
gear         32 non-null int64
carb         32 non-null int64
dtypes: float64(5), int64(6), object(1)
memory usage: 3.1+ KB


In [7]:
# Define X and y (Always the X has been capital)
# Car automatic or manual isd our target/dependent/y
# Using ,mpg,hp,wt as feature/dimensions/independent variables/X
X = cars.loc[:,['mpg','hp','wt']]
y = cars.am

In [9]:
X.head(2)

Unnamed: 0,mpg,hp,wt
0,21.0,110,2.62
1,21.0,110,2.875


In [10]:
# Train-Test Split
X_train,X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=13)

In [29]:
X_train

Unnamed: 0,mpg,hp,wt
19,33.9,65,1.835
23,13.3,245,3.84
0,21.0,110,2.62
21,15.5,150,3.52
4,22.8,93,2.32
11,16.4,180,4.07
9,19.2,123,3.44
29,19.7,175,2.77
13,15.2,180,3.78
5,18.1,105,3.46


In [12]:
cars.shape

(32, 12)

In [14]:
print("X_train shape:",X_train.shape)
print("X_test shape:",X_test.shape)
print("y_train shape:",y_train.shape)
print("y_test shape:",y_test.shape)

X_train shape: (22, 3)
X_test shape: (10, 3)
y_train shape: (22,)
y_test shape: (10,)


In [15]:
# Create the model
model = LogisticRegression()

In [30]:
X_test

Unnamed: 0,mpg,hp,wt
22,15.2,150,3.435
7,18.1,105,3.46
8,18.1,105,3.46
24,19.2,175,3.845
31,21.4,109,2.78
1,21.0,110,2.875
28,15.8,264,3.17
17,32.4,66,2.2
27,30.4,113,1.513
15,10.4,215,5.424


In [17]:
model.fit(X_train,y_train) # training
y_predict = model.predict(X_test)



In [31]:
y_predict

array([0, 0, 0, 0, 1, 0, 1, 1, 1, 0], dtype=int64)

In [18]:
accuracy_score(y_test,y_predict)

0.9

In [20]:
y_test

22    0
7     0
8     0
24    0
31    1
1     1
28    1
17    1
27    1
15    0
Name: am, dtype: int64

In [21]:
y_predict

array([0, 0, 0, 0, 1, 0, 1, 1, 1, 0], dtype=int64)

In [22]:
confusion_matrix(y_test,y_predict)

array([[5, 0],
       [1, 4]], dtype=int64)

In [23]:
pd.crosstab(y_test,y_predict)

col_0,0,1
am,Unnamed: 1_level_1,Unnamed: 2_level_1
0,5,0
1,1,4


In [25]:
from sklearn.metrics import precision_score
precision_score(y_test,y_predict)

1.0

In [26]:
from sklearn.metrics import recall_score
recall_score(y_test,y_predict)

0.8