In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import time
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
import pickle
import matplotlib.pyplot as plt


def selectkbest(indep_x,dep_y,n):
    test=SelectKBest(score_func=chi2,k=n)
    fit1=test.fit(indep_x,dep_y)
    selectk_features=fit1.transform(indep_x)
    return selectk_features

def split_scalar(indep_x,dep_y):
    x_train,x_test,y_train,y_test=train_test_split(indep_x,dep_y,test_size=0.25,random_state=0)
    sc=StandardScaler()
    x_train=sc.fit_transform(x_train)
    x_test=sc.transform(x_test)
    return x_train,x_test,y_train,y_test

def cm_prediction(classifier,x_test):
    y_pred=classifier.predict(x_test)
    
    # making the confusion matrix
    from sklearn.metrics import confusion_matrix
    cm=confusion_matrix(y_test,y_pred)
    
    from sklearn.metrics import accuracy_score, classification_report
    accuracy=accuracy_score(y_test,y_pred)
    report=classification_report(y_test,y_pred)
    
    return classifier,accuracy,report,x_test,y_test,cm

def logistic(x_train,y_train,x_test):
    
    #fitting model
    
    from sklearn.linear_model import LogisticRegression
    classifier=LogisticRegression(random_state=0)
    classifier.fit(x_train,y_train)
    
    classifier,accuracy,report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,accuracy,report,x_test,y_test,cm

def svm_linear(x_train,y_train,x_test):
    
    from sklearn.svm import SVC
    classifier=SVC(kernel='linear',random_state=0)
    classifier.fit(x_train,y_train)
    
    classifier,accuracy,report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,accuracy,report,x_test,y_test,cm

def svm_NL(x_train,y_train,x_test):
    
    from sklearn.svm import SVC
    classifier=SVC(kernel='rbf',random_state=0)
    classifier.fit(x_train,y_train)
    
    classifier,accuracy,report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,accuracy,report,x_test,y_test,cm

def Naive(x_train,y_train,x_test):
    
    from sklearn.naive_bayes import GaussianNB
    classifier=GaussianNB()
    classifier.fit(x_train,y_train)
    
    classifier,accuracy,report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,accuracy,report,x_test,y_test,cm

def knn(x_train,y_train,x_test):
    
    from sklearn.neighbors import KNeighborsClassifier
    classifier=KNeighborsClassifier(n_neighbors=5,metric='minkowski',p=2)
    classifier.fit(x_train,y_train)
    
    classifier,accuracy,report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,accuracy,report,x_test,y_test,cm

def Decision(x_train,y_train,x_test):
    
    from sklearn.tree import DecisionTreeClassifier
    classifier=DecisionTreeClassifier(criterion='entropy',random_state=0)
    classifier.fit(x_train,y_train)
    
    classifier,accuracy,report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,accuracy,report,x_test,y_test,cm


def random(x_train,y_train,x_test):
    
    from sklearn.ensemble import RandomForestClassifier
    classifier=RandomForestClassifier(n_estimators=10,criterion='entropy',random_state=0)
    classifier.fit(x_train,y_train)
    
    classifier,accuracy,report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,accuracy,report,x_test,y_test,cm

def selectk_classification(acclog,accsvm,accsvmnl,accknn,accnav,accdes,accrf):
    dataframe=pd.DataFrame(index=['ChiSquare'],columns=['Logistic','SVMl','SVMnl','KNN','Naive','Decision','Random'])
    for number,index in enumerate(dataframe.index):
        dataframe.loc[index, 'Logistic'] = acclog[number]
        dataframe.loc[index, 'SVMl'] = accsvm[number]
        dataframe.loc[index, 'SVMnl'] = accsvmnl[number]
        dataframe.loc[index, 'KNN'] = accknn[number]
        dataframe.loc[index, 'Naive'] = accnav[number]
        dataframe.loc[index, 'Decision'] = accdes[number]
        dataframe.loc[index, 'Random'] = accrf[number]
        
    return dataframe


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  method='lar', copy_X=True, eps=np.finfo(np.float).eps,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  method='lar', copy_X=True, eps=np.finfo(np.float).eps,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps, copy_Gram=True, verbose=0,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps, copy_X=True, fit_path=True,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps, copy_X=True, fit_path=True,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes

In [2]:
dataset=pd.read_csv('PrePlacement_data.csv',index_col=None)
df2=dataset
df2=pd.get_dummies(df2,drop_first=True)

indep_x=df2.drop('status_Placed',axis=1)
dep_y=df2['status_Placed']


In [18]:
kbest=selectkbest(indep_x,dep_y,8)

acclog=[]
accsvml=[]
accsvmnl=[]
accknn=[]
accdes=[]
accnav=[]
accrf=[]

In [19]:
X_train,X_test,y_train,y_test=split_scalar(kbest,dep_y)

classifier,Accuracy,report,X_test,y_test,cm=logistic(X_train,y_train,X_test)
acclog.append(Accuracy)
  
classifier,Accuracy,report,X_test,y_test,cm=svm_linear(X_train,y_train,X_test)  
accsvml.append(Accuracy)

classifier,Accuracy,report,X_test,y_test,cm=svm_NL(X_train,y_train,X_test)  
accsvmnl.append(Accuracy)
    
classifier,Accuracy,report,X_test,y_test,cm=knn(X_train,y_train,X_test)  
accknn.append(Accuracy)
    
classifier,Accuracy,report,X_test,y_test,cm=Naive(X_train,y_train,X_test)  
accnav.append(Accuracy)
    
classifier,Accuracy,report,X_test,y_test,cm=Decision(X_train,y_train,X_test)  
accdes.append(Accuracy)
    
classifier,Accuracy,report,X_test,y_test,cm=random(X_train,y_train,X_test)  
accrf.append(Accuracy)

result=selectk_classification(acclog,accsvml,accsvmnl,accknn,accnav,accdes,accrf)

result

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  indices = (scores > 0).astype(np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  self._y = np.empty(y.shape, dtype=np.int)
  old_joblib = LooseVersion(joblib_version) < LooseVersion('0.12')
  old_joblib = LooseVersion(joblib_version) < LooseVersion('0.12')
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_store_unique_indices = np.zeros(y.shape, dtype=np.int)
  if _joblib.__version__ >= LooseVersion('0.12'):
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Naive,Decision,Random
ChiSquare,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [5]:
# def selectkbest(indep_x,dep_y,n):
#     from sklearn.feature_selection import SelectKBest
#     from sklearn.feature_selection import chi2
#     test=SelectKBest(score_func=chi2,k=n)
#     fit1=test.fit(indep_x,dep_y)
#     df=pd.DataFrame({'Features':indep_x.columns,'Score':test.scores_}).sort_values(by='Score',ascending=False)
#     print(df)
#     selectk_features=fit1.transform(indep_x)

In [10]:
result#5

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Naive,Decision,Random
ChiSquare,1.0,1.0,1.0,1.0,1.0,1.0,0.981481


In [17]:
result#10

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Naive,Decision,Random
ChiSquare,1.0,1.0,1.0,0.981481,1.0,1.0,1.0


In [20]:
result #8

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Naive,Decision,Random
ChiSquare,1.0,1.0,1.0,1.0,1.0,1.0,1.0
