In [44]:
import pandas as pd
dataset = pd.read_csv("PreAnxiety.csv")

In [45]:
dataset.head(3)

Unnamed: 0,1. Age,2. Gender,3. University,4. Department,5. Academic Year,6. Current CGPA,7. Did you receive a waiver or scholarship at your university?,Anxiety Label
0,18-22,Female,"Independent University, Bangladesh (IUB)",Engineering - CS / CSE / CSC / Similar to CS,Second Year or Equivalent,2.50 - 2.99,No,More Anxious
1,18-22,Male,"Independent University, Bangladesh (IUB)",Engineering - CS / CSE / CSC / Similar to CS,Third Year or Equivalent,3.00 - 3.39,No,More Anxious
2,18-22,Male,American International University Bangladesh (...,Engineering - CS / CSE / CSC / Similar to CS,Third Year or Equivalent,3.00 - 3.39,No,Less Anxious


In [46]:
x = dataset.iloc[:,0:7]
y = dataset.iloc[:,-1]

In [47]:
y.value_counts()

More Anxious    1869
Less Anxious     159
Name: Anxiety Label, dtype: int64

In [48]:
y.unique()

array(['More Anxious', 'Less Anxious'], dtype=object)

<!-- Our Dataset seems to be imbalance, Lets fix this imbalance using SMOTE -->

## Dataset is highly imbalanced, let's balance it using SMOTE

In [49]:
x_dum = pd.get_dummies(x,drop_first=True, dtype=int)
y_dum = pd.get_dummies(y,drop_first=True,dtype=int)

In [50]:
from imblearn.over_sampling import SMOTE

In [51]:
sm = SMOTE(random_state=42)

In [52]:
x_resampled, y_resampled = sm.fit_resample(x_dum,y_dum)

In [53]:
len(x_resampled.columns)

41

In [54]:
y_resampled.value_counts()

More Anxious
0               1869
1               1869
dtype: int64

# Now we got the balanced dataset

# Feature Selection Using Select K Best:

In [55]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

In [56]:
selectBest = SelectKBest(score_func=chi2, k=15)

In [57]:
selectkbest = selectBest.fit(x_resampled,y_resampled).transform(x_resampled)

In [58]:
selectkbest

array([[0, 0, 0, ..., 0, 1, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [59]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier


param_grid = {
    "n_estimators":[200,100],
    'max_features':['sqrt', 'log2', None],
    "criterion":["gini", "entropy", "log_loss"]
}

grid = GridSearchCV(RandomForestClassifier(),param_grid,scoring="accuracy",verbose=3,refit=True,n_jobs=-1)
grid.fit(selectkbest,y_resampled)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


  self.best_estimator_.fit(X, y, **fit_params)


## Result of Select K for 15 features

In [60]:
pd.DataFrame(grid.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.794003,0.038661,0.043756,0.008781,gini,sqrt,200,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.665775,0.716578,0.716578,0.705489,0.729585,0.706801,0.021887,4
1,0.377863,0.010333,0.028228,0.004493,gini,sqrt,100,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.663102,0.721925,0.716578,0.705489,0.729585,0.707336,0.023466,2
2,0.844556,0.085006,0.068739,0.024914,gini,log2,200,"{'criterion': 'gini', 'max_features': 'log2', ...",0.663102,0.716578,0.713904,0.70415,0.729585,0.705464,0.022684,17
3,0.389533,0.039904,0.03464,0.012858,gini,log2,100,"{'criterion': 'gini', 'max_features': 'log2', ...",0.663102,0.720588,0.712567,0.702811,0.729585,0.705731,0.023075,16
4,0.970604,0.020185,0.043887,0.011617,gini,,200,"{'criterion': 'gini', 'max_features': None, 'n...",0.665775,0.719251,0.712567,0.70415,0.729585,0.706266,0.021891,11
5,0.481179,0.042971,0.026347,0.005272,gini,,100,"{'criterion': 'gini', 'max_features': None, 'n...",0.663102,0.720588,0.713904,0.705489,0.729585,0.706533,0.023111,6
6,0.709852,0.029647,0.045479,0.004905,entropy,sqrt,200,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.661765,0.721925,0.715241,0.705489,0.729585,0.706801,0.023871,4
7,0.350801,0.0101,0.029554,0.001625,entropy,sqrt,100,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.664439,0.721925,0.717914,0.705489,0.729585,0.70787,0.023074,1
8,0.68252,0.026309,0.043008,0.008527,entropy,log2,200,"{'criterion': 'entropy', 'max_features': 'log2...",0.661765,0.717914,0.716578,0.70415,0.729585,0.705998,0.023538,14
9,0.349684,0.013765,0.019975,0.005583,entropy,log2,100,"{'criterion': 'entropy', 'max_features': 'log2...",0.663102,0.716578,0.716578,0.705489,0.729585,0.706266,0.022892,10


## Select K Feature Selection has given us Accuracy of 70.76% with Random Forest

# Feature Selection Using RFE:

In [61]:
from sklearn.feature_selection import RFE
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

log_model = LogisticRegression(solver='lbfgs')
RF = RandomForestClassifier(n_estimators = 100, criterion = 'entropy', random_state = 0)
DT= DecisionTreeClassifier(criterion = 'gini', max_features='sqrt',splitter='best',random_state = 0)
svc_model = SVC(kernel = 'linear', random_state = 0)
rfemodellist=[log_model,svc_model,RF,DT] 
log_rfe_feature=[]
Selected_features = []
for i in   rfemodellist:
    print(i)
    log_rfe = RFE(estimator=i,n_features_to_select=15)
    log_fit = log_rfe.fit(x_resampled, y_resampled)
    log_rfe_feature.append(log_fit.transform(x_resampled))
    Selected_features.append(x_resampled.columns[log_fit.support_])
    

LogisticRegression()


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


SVC(kernel='linear', random_state=0)


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


RandomForestClassifier(criterion='entropy', random_state=0)


  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:,

DecisionTreeClassifier(max_features='sqrt', random_state=0)


## Using Random Forest Selected features from RFE to Train Random Forest model 

In [62]:
param_grid = {
    "n_estimators":[200,100],
    'max_features':['sqrt', 'log2', None],
    "criterion":["gini", "entropy", "log_loss"]
}

grid = GridSearchCV(RandomForestClassifier(),param_grid,scoring="accuracy",verbose=3,refit=True,n_jobs=-1)
best_model = grid.fit(log_rfe_feature[2],y_resampled)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


  self.best_estimator_.fit(X, y, **fit_params)


In [63]:
best_model = grid.best_estimator_
pd.DataFrame(grid.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.773841,0.006655,0.051945,0.008132,gini,sqrt,200,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.75,0.794118,0.823529,0.801874,0.8166,0.797224,0.025802,5
1,0.392553,0.003524,0.028614,0.004932,gini,sqrt,100,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.747326,0.794118,0.818182,0.796519,0.815261,0.794281,0.025382,14
2,0.803605,0.012931,0.050822,0.007685,gini,log2,200,"{'criterion': 'gini', 'max_features': 'log2', ...",0.748663,0.791444,0.826203,0.803213,0.811245,0.796154,0.026299,10
3,0.383541,0.01565,0.028282,0.006751,gini,log2,100,"{'criterion': 'gini', 'max_features': 'log2', ...",0.75,0.794118,0.824866,0.801874,0.815261,0.797224,0.025887,6
4,1.060524,0.026483,0.045485,0.006371,gini,,200,"{'criterion': 'gini', 'max_features': None, 'n...",0.745989,0.792781,0.823529,0.800535,0.815261,0.795619,0.027056,12
5,0.543157,0.017467,0.028494,0.012462,gini,,100,"{'criterion': 'gini', 'max_features': None, 'n...",0.744652,0.791444,0.826203,0.795181,0.813922,0.794281,0.027845,15
6,0.77321,0.029262,0.047947,0.00949,entropy,sqrt,200,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.748663,0.795455,0.826203,0.801874,0.819277,0.798294,0.027215,1
7,0.399429,0.012625,0.031339,0.002281,entropy,sqrt,100,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.751337,0.78877,0.822193,0.800535,0.8166,0.795887,0.025211,11
8,0.818896,0.03902,0.047438,0.005952,entropy,log2,200,"{'criterion': 'entropy', 'max_features': 'log2...",0.751337,0.796791,0.824866,0.801874,0.812584,0.79749,0.025008,4
9,0.402105,0.02345,0.024709,0.005836,entropy,log2,100,"{'criterion': 'entropy', 'max_features': 'log2...",0.751337,0.799465,0.819519,0.801874,0.8166,0.797759,0.024508,3


# For 15 Features, Random Forest has given us better accuracy of 79.72%

## Using SVC Selected features from RFE to Train Random Forest model 

In [64]:
param_grid = {
    "n_estimators":[200,100],
    'max_features':['sqrt', 'log2', None],
    "criterion":["gini", "entropy", "log_loss"]
}

grid = GridSearchCV(RandomForestClassifier(),param_grid,scoring="accuracy",verbose=3,refit=True,n_jobs=-1)
grid.fit(log_rfe_feature[1],y_resampled)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


  self.best_estimator_.fit(X, y, **fit_params)


In [65]:
pd.DataFrame(grid.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.636201,0.013092,0.043609,0.005073,gini,sqrt,200,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
1,0.31817,0.016035,0.023664,0.007224,gini,sqrt,100,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
2,0.652313,0.00338,0.035151,0.002824,gini,log2,200,"{'criterion': 'gini', 'max_features': 'log2', ...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
3,0.340284,0.017243,0.02052,0.004814,gini,log2,100,"{'criterion': 'gini', 'max_features': 'log2', ...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
4,0.777019,0.018956,0.03843,0.006463,gini,,200,"{'criterion': 'gini', 'max_features': None, 'n...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
5,0.394516,0.010106,0.017732,0.001653,gini,,100,"{'criterion': 'gini', 'max_features': None, 'n...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
6,0.648961,0.038133,0.035931,0.005413,entropy,sqrt,200,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
7,0.348031,0.026817,0.016988,0.001258,entropy,sqrt,100,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
8,0.656937,0.022106,0.037979,0.006849,entropy,log2,200,"{'criterion': 'entropy', 'max_features': 'log2...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
9,0.31893,0.013932,0.02849,0.003557,entropy,log2,100,"{'criterion': 'entropy', 'max_features': 'log2...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1


## Using Logistic Regression Selected feature from RFE to Train Random Forest model 

In [66]:
param_grid = {
    "n_estimators":[200,100],
    'max_features':['sqrt', 'log2', None],
    "criterion":["gini", "entropy", "log_loss"]
}

grid = GridSearchCV(RandomForestClassifier(),param_grid,scoring="accuracy",verbose=3,refit=True,n_jobs=-1)
grid.fit(log_rfe_feature[0],y_resampled)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


  self.best_estimator_.fit(X, y, **fit_params)


In [67]:
pd.DataFrame(grid.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.66757,0.035497,0.052111,0.018125,gini,sqrt,200,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1
1,0.33335,0.011653,0.02377,0.005362,gini,sqrt,100,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1
2,0.627516,0.013148,0.040275,0.006223,gini,log2,200,"{'criterion': 'gini', 'max_features': 'log2', ...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1
3,0.317244,0.007044,0.022467,0.007264,gini,log2,100,"{'criterion': 'gini', 'max_features': 'log2', ...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1
4,0.814153,0.034691,0.038501,0.005474,gini,,200,"{'criterion': 'gini', 'max_features': None, 'n...",0.643048,0.696524,0.68984,0.685408,0.702811,0.683526,0.021084,15
5,0.412267,0.017735,0.020953,0.006383,gini,,100,"{'criterion': 'gini', 'max_features': None, 'n...",0.643048,0.696524,0.68984,0.685408,0.702811,0.683526,0.021084,15
6,0.644055,0.021995,0.041548,0.005743,entropy,sqrt,200,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1
7,0.330907,0.011167,0.018017,0.002288,entropy,sqrt,100,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1
8,0.648053,0.042483,0.038176,0.003575,entropy,log2,200,"{'criterion': 'entropy', 'max_features': 'log2...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1
9,0.337143,0.013299,0.030634,0.008626,entropy,log2,100,"{'criterion': 'entropy', 'max_features': 'log2...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1


In [68]:
Selected_features[3]

Index(['1. Age_23-26', '2. Gender_Male', '3. University_Dhaka University (DU)',
       '3. University_Dhaka University of Engineering and Technology (DUET)',
       '3. University_Independent University, Bangladesh (IUB)',
       '3. University_Islamic University of Technology (IUT)',
       '3. University_North South University (NSU)',
       '4. Department_Business and Entrepreneurship Studies',
       '4. Department_Engineering - CS / CSE / CSC / Similar to CS',
       '4. Department_Other', '5. Academic Year_Fourth Year or Equivalent',
       '5. Academic Year_Other', '5. Academic Year_Second Year or Equivalent',
       '5. Academic Year_Third Year or Equivalent',
       '7. Did you receive a waiver or scholarship at your university?_Yes'],
      dtype='object')

In [69]:
# university = int(input("Enter:\n1 for IUB\n2 for AIUB\n3 for NSU\n4 for IUT\n5 for PSTU\n6 for RUET\n7 for DU\n8 for BUET\n9 for DUET\n10 for UIU\n11 for EWU\n12 for BRAC\n13 for BAU\n14 for RU\n15 for Daffodil\n"))-1
entered_age = int(input("Enter your Age: "))
age=""
if(entered_age<18):
    age="Below 18"
elif entered_age<=22:
    age="18-22"
elif entered_age<=26:
    age="23-26"
elif entered_age<=30:
    age="27-30"
else:
    age="Above 30"
    
entered_gender = input("Gender (male/female/m/f): ").lower()[0:1]

if "m" in entered_gender:
    gender="Male"
elif "f" in entered_gender:
    gender="Female"
else:
    gender="Prefer not to say"
    
for i,value in enumerate(x["3. University"].unique()):
    
    print(f"Enter: {i+1} for {value}")
    
university = int(input("Enter Here:"))-1
    
selected_university = ""
for i,value in enumerate(x["3. University"].unique()):
    if(university==i):
        selected_university=value
        break;

        
for i,value in enumerate(x["4. Department"].unique()):
    
    print(f"Enter: {i+1} for {value}")
    
department = int(input("Enter Here:"))-1
    
selected_department = ""
for i,value in enumerate(x["4. Department"].unique()):
    if(department==i):
        selected_department=value
        break;
    
for i,value in enumerate(x["5. Academic Year"].unique()):
    
    print(f"Enter: {i+1} for {value}")
    
academic_year = int(input("Enter Here:"))-1
    
selected_academic_year = ""
for i,value in enumerate(x["5. Academic Year"].unique()):
    if(academic_year==i):
        selected_academic_year=value
        break;
        
for i,value in enumerate(x["6. Current CGPA"].unique()):
    
    print(f"Enter: {i+1} for {value}")
    
current_cgpa = int(input("Enter Here:"))-1
    
selected_current_cgpa = ""
for i,value in enumerate(x["6. Current CGPA"].unique()):
    if(current_cgpa==i):
        selected_current_cgpa=value
        break;
        
for i,value in enumerate(x["7. Did you receive a waiver or scholarship at your university?"].unique()):
    
    print(f"Enter: {i+1} for {value}")
    
entered_scholarship = int(input("Enter Here:"))-1
    
selected_scholarship = ""
for i,value in enumerate(x["7. Did you receive a waiver or scholarship at your university?"].unique()):
    if(entered_scholarship==i):
        selected_scholarship=value
        break;

    
print(f"You're Age:{entered_age}, You are grouped under the age group: {age}")
print(f"You're Gender:{gender}")  

print(f"You're Selected University:{selected_university}")
print(f"You're Selected Department:{selected_department}")  
print(f"You're Selected Academic Year:{selected_academic_year}")
print(f"You're CGPA:{current_cgpa}, You are grouped under the CGPA group: {selected_current_cgpa}")
print(f"You're Scholarship Status:{selected_scholarship}")

Enter your Age: 27
Gender (male/female/m/f): m
Enter: 1 for Independent University, Bangladesh (IUB)
Enter: 2 for American International University Bangladesh (AIUB)
Enter: 3 for North South University (NSU)
Enter: 4 for Islamic University of Technology (IUT)
Enter: 5 for Patuakhali Science and Technology University
Enter: 6 for Rajshahi University of Engineering and Technology (RUET)
Enter: 7 for Dhaka University (DU)
Enter: 8 for Bangladesh University of Engineering and Technology (BUET)
Enter: 9 for Dhaka University of Engineering and Technology (DUET)
Enter: 10 for United International University (UIU)
Enter: 11 for East West University (EWU)
Enter: 12 for BRAC University
Enter: 13 for Bangladesh Agricultural University (BAU)
Enter: 14 for Rajshahi University (RU)
Enter: 15 for Daffodil University
Enter Here:2
Enter: 1 for Engineering - CS / CSE / CSC / Similar to CS
Enter: 2 for Engineering - EEE/ ECE / Similar to EEE
Enter: 3 for Other
Enter: 4 for Business and Entrepreneurship S

In [70]:
user_inputs = [age,gender,selected_university,selected_department,selected_academic_year,selected_current_cgpa,selected_scholarship]

In [71]:
inputfeatures = [0 for i in Selected_features[3]]

In [72]:
for i in user_inputs:
    for index,j in enumerate(Selected_features[3]):
        if j.endswith(i):
            if j.split("_")[-1] == i:
                inputfeatures[index] = 1

In [73]:
best_model.predict([inputfeatures])

array([0])

In [74]:
# newdata = pd.read_csv("PreAnxiety.csv")

In [75]:
# for k in newdata.iloc[:,0:7].values:
#     inputfeatures = [0] * len(Selected_features[3])
#     for i in k:
#         for index,j in enumerate(Selected_features[3]):
#             if j.endswith(i):
#                 if j.split("_")[-1] == i:
#                     inputfeatures[index] = 1
#     print(best_model.predict([inputfeatures]))            

In [76]:
# newdata.iloc[:,0:6]

In [77]:
import pickle

In [78]:
with open('./../4.Final Model/AnxietyModel.sav',"wb") as file:
    pickle.dump(best_model,file)

In [122]:
with open('./../4.Final Model/SelectedFeatures.txt',"w") as file:
    for feature in Selected_features[3]:
        file.write(f"{feature}\n")
        
pickle.dump(list(Selected_features[3]),open("./../4.Final Model/SelectedFeatures.sav","wb"))

In [80]:
filename = "./../4.Final Model/encoder_columns.sav"

In [81]:
pickle.dump(list(x_dum.columns),open(filename,"wb"))

In [123]:
encoder_columns = pickle.load(open("./../4.Final Model/encoder_columns.sav","rb"))
selected_features = pickle.load(open("./../4.Final Model/SelectedFeatures.sav","rb"))

In [110]:
new_data = pd.DataFrame(columns=["1. Age","2. Gender","3. University","4. Department","5. Academic Year","6. Current CGPA","7. Did you receive a waiver or scholarship at your university?"],data=[["23-26","Male","Dhaka University (DU)","Engineering - CS / CSE / CSC / Similar to CS","Fourth Year or Equivalent","3.80 - 4.00","Yes"]])

In [111]:
new_data_encoded = pd.get_dummies(new_data)

In [112]:
new_data_encoded

Unnamed: 0,1. Age_23-26,2. Gender_Male,3. University_Dhaka University (DU),4. Department_Engineering - CS / CSE / CSC / Similar to CS,5. Academic Year_Fourth Year or Equivalent,6. Current CGPA_3.80 - 4.00,7. Did you receive a waiver or scholarship at your university?_Yes
0,1,1,1,1,1,1,1


In [113]:
for col in encoder_columns:
    if col not in new_data_encoded.columns:
        new_data_encoded[col]=0

new_data_encoded

Unnamed: 0,1. Age_23-26,2. Gender_Male,3. University_Dhaka University (DU),4. Department_Engineering - CS / CSE / CSC / Similar to CS,5. Academic Year_Fourth Year or Equivalent,6. Current CGPA_3.80 - 4.00,7. Did you receive a waiver or scholarship at your university?_Yes,1. Age_27-30,1. Age_Above 30,1. Age_Below 18,...,4. Department_Liberal Arts and Social Sciences,4. Department_Other,4. Department_Pharmacy and Public Health,5. Academic Year_Other,5. Academic Year_Second Year or Equivalent,5. Academic Year_Third Year or Equivalent,6. Current CGPA_3.00 - 3.39,6. Current CGPA_3.40 - 3.79,6. Current CGPA_Below 2.50,6. Current CGPA_Other
0,1,1,1,1,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [114]:
# Yhis line arranges the new_data_encoded in the order of encoder_columns and also removes the extra columns in new_data_encoded which doesn't match the encoder_columns 
new_data_encoded= new_data_encoded[encoder_columns] 

In [115]:
len(new_data_encoded.columns)

41

In [124]:
best_model.predict(new_data_encoded[selected_features])



array([1])