## Student Placement Prediction using Logistic Regression

### Import Library

In [1]:
import pandas as pd

In [2]:
import numpy as np

### Import DataFrame

In [3]:
df=pd.read_csv(r"https://github.com/YBI-Foundation/Dataset/raw/main/Placement.csv")

### Get First Five Rows of Dataframe

In [4]:
df.head()

Unnamed: 0,Student_ID,CGPA,IQ,Placement
0,1,6.8,123,1
1,2,5.9,106,0
2,3,5.3,121,0
3,4,7.4,132,1
4,5,5.8,142,0


### Get Information of Dataframe

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Student_ID  100 non-null    int64  
 1   CGPA        100 non-null    float64
 2   IQ          100 non-null    int64  
 3   Placement   100 non-null    int64  
dtypes: float64(1), int64(3)
memory usage: 3.2 KB


### Get Summary Statistics

In [6]:
df.describe()

Unnamed: 0,Student_ID,CGPA,IQ,Placement
count,100.0,100.0,100.0,100.0
mean,50.5,5.991,123.58,0.5
std,29.011492,1.143634,39.944198,0.502519
min,1.0,3.3,37.0,0.0
25%,25.75,5.075,101.5,0.0
50%,50.5,6.0,127.5,0.5
75%,75.25,6.9,149.0,1.0
max,100.0,8.5,233.0,1.0


### Get Column Names

In [7]:
df.columns

Index(['Student_ID', 'CGPA', 'IQ', 'Placement'], dtype='object')

### Get Shape of DataFrame

In [8]:
df.shape

(100, 4)

### Get Unique Values in y variable

In [9]:
df['Placement'].value_counts()

1    50
0    50
Name: Placement, dtype: int64

In [10]:
df.groupby('Placement').mean()

Unnamed: 0_level_0,Student_ID,CGPA,IQ
Placement,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,46.32,5.056,126.88
1,54.68,6.926,120.28


### Define y and X

In [11]:
y=df['Placement']

In [12]:
y.shape

(100,)

In [13]:
y

0     1
1     0
2     0
3     1
4     0
     ..
95    0
96    0
97    1
98    1
99    1
Name: Placement, Length: 100, dtype: int64

In [14]:
X=df[['CGPA','IQ']]

In [15]:
X.shape

(100, 2)

In [16]:
X

Unnamed: 0,CGPA,IQ
0,6.8,123
1,5.9,106
2,5.3,121
3,7.4,132
4,5.8,142
...,...,...
95,4.3,200
96,4.4,42
97,6.7,182
98,6.3,103


### Get X Standardized

In [17]:
from sklearn.preprocessing import StandardScaler

In [18]:
ss=StandardScaler()

In [19]:
X=ss.fit_transform(X)

In [20]:
X

array([[ 0.71095807, -0.01459341],
       [-0.0799718 , -0.44233119],
       [-0.60725838, -0.0649155 ],
       [ 1.23824465,  0.21185601],
       [-0.16785289,  0.46346647],
       [ 0.97460136, -1.90167186],
       [-0.25573399,  0.48862751],
       [-0.87090167, -1.52425617],
       [ 0.0957904 ,  0.81572111],
       [-0.78302057, -1.44877303],
       [ 0.0079093 , -1.977155  ],
       [ 0.79883917,  0.36282228],
       [-0.51937728,  0.38798333],
       [ 0.35943369, -0.19072073],
       [ 0.0957904 , -0.51781433],
       [-0.78302057,  1.31894203],
       [-0.69513948,  2.52667224],
       [-2.36488031,  1.49506935],
       [-1.74971264, -0.59329747],
       [-0.69513948,  0.21185601],
       [ 0.53519588, -0.09007654],
       [ 0.97460136,  0.68991588],
       [-0.95878277, -0.09007654],
       [-1.13454496, -0.92039106],
       [-1.13454496, -0.0649155 ],
       [-0.87090167, -0.81974688],
       [ 0.88672027,  1.89764609],
       [ 0.0079093 ,  0.01056764],
       [-0.69513948,

In [21]:
X.shape

(100, 2)

### Get Train Test Split

In [22]:
from sklearn.model_selection import train_test_split

In [23]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,stratify=y,random_state=92529)

In [24]:
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((70, 2), (30, 2), (70,), (30,))

### Get Model Train

In [25]:
from sklearn.linear_model import LogisticRegression

In [26]:
lr=LogisticRegression()

In [27]:
lr.fit(X_train,y_train)

### Get Model Prediction

In [28]:
y_pred=lr.predict(X_test)

In [29]:
y_pred.shape

(30,)

In [30]:
y_pred

array([0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1,
       0, 0, 1, 0, 1, 1, 1, 1], dtype=int64)

### Get Probability of each predicted class

In [31]:
lr.predict_proba(X_test)

array([[0.64455539, 0.35544461],
       [0.93916617, 0.06083383],
       [0.94865306, 0.05134694],
       [0.02834725, 0.97165275],
       [0.85595238, 0.14404762],
       [0.95624347, 0.04375653],
       [0.4437668 , 0.5562332 ],
       [0.05990068, 0.94009932],
       [0.96782808, 0.03217192],
       [0.01099541, 0.98900459],
       [0.38659021, 0.61340979],
       [0.63258193, 0.36741807],
       [0.58114718, 0.41885282],
       [0.00415097, 0.99584903],
       [0.01636962, 0.98363038],
       [0.01187471, 0.98812529],
       [0.99825465, 0.00174535],
       [0.01352788, 0.98647212],
       [0.98352581, 0.01647419],
       [0.46702249, 0.53297751],
       [0.45611275, 0.54388725],
       [0.0547289 , 0.9452711 ],
       [0.94107565, 0.05892435],
       [0.9960009 , 0.0039991 ],
       [0.04857768, 0.95142232],
       [0.97223795, 0.02776205],
       [0.0939974 , 0.9060026 ],
       [0.01856389, 0.98143611],
       [0.08568139, 0.91431861],
       [0.00662216, 0.99337784]])

### Get Model Evaluation

In [32]:
from sklearn.metrics import classification_report,confusion_matrix

In [33]:
print(confusion_matrix(y_test,y_pred))

[[13  2]
 [ 0 15]]


In [34]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       1.00      0.87      0.93        15
           1       0.88      1.00      0.94        15

    accuracy                           0.93        30
   macro avg       0.94      0.93      0.93        30
weighted avg       0.94      0.93      0.93        30



### Get Future Predictions

In [35]:
X_new=df.sample(1)

In [36]:
X_new

Unnamed: 0,Student_ID,CGPA,IQ,Placement
74,75,6.7,154,1


In [37]:
X_new.shape

(1, 4)

In [38]:
X_new=X_new.drop(['Student_ID','Placement'],axis=1)

In [39]:
X_new

Unnamed: 0,CGPA,IQ
74,6.7,154


In [40]:
X_new.shape

(1, 2)

In [41]:
X_new=ss.fit_transform(X_new)

In [42]:
y_pred_new=lr.predict(X_new)

In [43]:
y_pred_new

array([1], dtype=int64)

In [44]:
lr.predict_proba(X_new)

array([[0.46234875, 0.53765125]])