In [1]:
import pandas as pd
dataset = pd.read_csv("PreAnxiety.csv")

In [2]:
dataset.head(3)

Unnamed: 0,1. Age,2. Gender,3. University,4. Department,5. Academic Year,6. Current CGPA,7. Did you receive a waiver or scholarship at your university?,Anxiety Label
0,18-22,Female,"Independent University, Bangladesh (IUB)",Engineering - CS / CSE / CSC / Similar to CS,Second Year or Equivalent,2.50 - 2.99,No,More Anxious
1,18-22,Male,"Independent University, Bangladesh (IUB)",Engineering - CS / CSE / CSC / Similar to CS,Third Year or Equivalent,3.00 - 3.39,No,More Anxious
2,18-22,Male,American International University Bangladesh (...,Engineering - CS / CSE / CSC / Similar to CS,Third Year or Equivalent,3.00 - 3.39,No,Less Anxious


In [3]:
x = dataset.iloc[:,0:7]
y = dataset.iloc[:,-1]

In [4]:
y.value_counts()

More Anxious    1869
Less Anxious     159
Name: Anxiety Label, dtype: int64

In [5]:
y.unique()

array(['More Anxious', 'Less Anxious'], dtype=object)

<!-- Our Dataset seems to be imbalance, Lets fix this imbalance using SMOTE -->

## Dataset is highly imbalanced, let's balance it using SMOTE

In [6]:
x_dum = pd.get_dummies(x,drop_first=True, dtype=int)
y_dum = pd.get_dummies(y,drop_first=True,dtype=int)

In [7]:
from imblearn.over_sampling import SMOTE

In [8]:
sm = SMOTE(random_state=42)

In [9]:
x_resampled, y_resampled = sm.fit_resample(x_dum,y_dum)

In [10]:
len(x_resampled.columns)

41

In [11]:
y_resampled.value_counts()

More Anxious
0               1869
1               1869
dtype: int64

# Now we got the balanced dataset

# Feature Selection Using Select K Best:

In [12]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

In [13]:
selectBest = SelectKBest(score_func=chi2, k=15)

In [14]:
selectkbest = selectBest.fit(x_resampled,y_resampled).transform(x_resampled)

In [15]:
selectkbest

array([[0, 0, 0, ..., 0, 1, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [17]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier


param_grid = {
    "n_estimators":[200,100],
    'max_features':['sqrt', 'log2', None],
    "criterion":["gini", "entropy", "log_loss"]
}

grid = GridSearchCV(RandomForestClassifier(),param_grid,scoring="accuracy",verbose=3,refit=True,n_jobs=-1)
grid.fit(selectkbest,y_resampled)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


  self.best_estimator_.fit(X, y, **fit_params)


## Result of Select K for 15 features

In [18]:
pd.DataFrame(grid.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.908838,0.082956,0.043797,0.004714,gini,sqrt,200,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.663102,0.719251,0.713904,0.70415,0.729585,0.705998,0.02297,13
1,0.386957,0.01825,0.030525,0.00853,gini,sqrt,100,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.665775,0.717914,0.716578,0.702811,0.729585,0.706533,0.022076,9
2,0.752458,0.019155,0.04896,0.004973,gini,log2,200,"{'criterion': 'gini', 'max_features': 'log2', ...",0.661765,0.721925,0.716578,0.705489,0.729585,0.707068,0.023971,2
3,0.353058,0.008481,0.034999,0.007128,gini,log2,100,"{'criterion': 'gini', 'max_features': 'log2', ...",0.661765,0.719251,0.715241,0.70415,0.729585,0.705998,0.023569,13
4,1.209437,0.285204,0.062897,0.028419,gini,,200,"{'criterion': 'gini', 'max_features': None, 'n...",0.665775,0.717914,0.716578,0.70415,0.729585,0.7068,0.022038,4
5,1.024741,0.299991,0.071194,0.010667,gini,,100,"{'criterion': 'gini', 'max_features': None, 'n...",0.663102,0.720588,0.713904,0.701473,0.729585,0.70573,0.023203,17
6,2.019339,0.049792,0.127877,0.010188,entropy,sqrt,200,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.664439,0.719251,0.713904,0.70415,0.729585,0.706266,0.022471,10
7,1.05208,0.047826,0.07429,0.00645,entropy,sqrt,100,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.664439,0.723262,0.713904,0.705489,0.729585,0.707336,0.022958,1
8,2.036343,0.051481,0.129099,0.020424,entropy,log2,200,"{'criterion': 'entropy', 'max_features': 'log2...",0.661765,0.719251,0.712567,0.702811,0.729585,0.705196,0.023404,18
9,1.064856,0.023545,0.070802,0.00854,entropy,log2,100,"{'criterion': 'entropy', 'max_features': 'log2...",0.661765,0.719251,0.715241,0.70415,0.729585,0.705998,0.023569,13


## Select K Feature Selection has given us Accuracy of 70.76% with Random Forest

# Feature Selection Using RFE:

In [19]:
from sklearn.feature_selection import RFE
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

log_model = LogisticRegression(solver='lbfgs')
RF = RandomForestClassifier(n_estimators = 100, criterion = 'entropy', random_state = 0)
DT= DecisionTreeClassifier(criterion = 'gini', max_features='sqrt',splitter='best',random_state = 0)
svc_model = SVC(kernel = 'linear', random_state = 0)
rfemodellist=[log_model,svc_model,RF,DT] 
log_rfe_feature=[]
Selected_features = []
for i in   rfemodellist:
    print(i)
    log_rfe = RFE(estimator=i,n_features_to_select=15)
    log_fit = log_rfe.fit(x_resampled, y_resampled)
    log_rfe_feature.append(log_fit.transform(x_resampled))
    Selected_features.append(x_resampled.columns[log_fit.support_])
    

LogisticRegression()


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


SVC(kernel='linear', random_state=0)


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


RandomForestClassifier(criterion='entropy', random_state=0)


  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:,

DecisionTreeClassifier(max_features='sqrt', random_state=0)


## Using Random Forest Selected features from RFE to Train Random Forest model 

In [23]:
param_grid = {
    "n_estimators":[200,100],
    'max_features':['sqrt', 'log2', None],
    "criterion":["gini", "entropy", "log_loss"]
}

grid = GridSearchCV(RandomForestClassifier(),param_grid,scoring="accuracy",verbose=3,refit=True,n_jobs=-1)
best_model = grid.fit(log_rfe_feature[2],y_resampled)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


  self.best_estimator_.fit(X, y, **fit_params)


In [27]:
best_model = grid.best_estimator_
pd.DataFrame(grid.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,2.438919,0.017907,0.140031,0.022664,gini,sqrt,200,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.751337,0.792781,0.824866,0.801874,0.812584,0.796688,0.025082,6
1,1.200707,0.044946,0.082712,0.011119,gini,sqrt,100,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.75,0.791444,0.82754,0.797858,0.812584,0.795885,0.026115,10
2,2.405783,0.046051,0.148409,0.013307,gini,log2,200,"{'criterion': 'gini', 'max_features': 'log2', ...",0.75,0.798128,0.823529,0.803213,0.819277,0.79883,0.0262,1
3,1.194213,0.020588,0.085907,0.009407,gini,log2,100,"{'criterion': 'gini', 'max_features': 'log2', ...",0.752674,0.795455,0.823529,0.801874,0.812584,0.797223,0.024238,4
4,3.356051,0.057795,0.144828,0.008422,gini,,200,"{'criterion': 'gini', 'max_features': None, 'n...",0.744652,0.792781,0.82754,0.795181,0.813922,0.794815,0.02813,14
5,1.671752,0.080245,0.070382,0.011659,gini,,100,"{'criterion': 'gini', 'max_features': None, 'n...",0.743316,0.794118,0.820856,0.800535,0.8166,0.795085,0.027705,12
6,2.443315,0.079394,0.126175,0.01393,entropy,sqrt,200,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.748663,0.796791,0.819519,0.801874,0.812584,0.795886,0.024916,9
7,1.23885,0.044994,0.076005,0.008393,entropy,sqrt,100,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.75,0.78877,0.824866,0.801874,0.8166,0.796422,0.026304,8
8,2.429241,0.065646,0.136865,0.018817,entropy,log2,200,"{'criterion': 'entropy', 'max_features': 'log2...",0.752674,0.799465,0.820856,0.801874,0.813922,0.797758,0.023863,3
9,1.190082,0.044166,0.074964,0.015141,entropy,log2,100,"{'criterion': 'entropy', 'max_features': 'log2...",0.751337,0.794118,0.824866,0.800535,0.812584,0.796688,0.024999,7


# For 15 Features, Random Forest has given us better accuracy of 79.72%

## Using SVC Selected features from RFE to Train Random Forest model 

In [28]:
param_grid = {
    "n_estimators":[200,100],
    'max_features':['sqrt', 'log2', None],
    "criterion":["gini", "entropy", "log_loss"]
}

grid = GridSearchCV(RandomForestClassifier(),param_grid,scoring="accuracy",verbose=3,refit=True,n_jobs=-1)
grid.fit(log_rfe_feature[1],y_resampled)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


  self.best_estimator_.fit(X, y, **fit_params)


In [29]:
pd.DataFrame(grid.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.705341,0.014709,0.043906,0.005603,gini,sqrt,200,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
1,0.349619,0.012465,0.017747,0.00304,gini,sqrt,100,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
2,0.710183,0.031915,0.045459,0.00384,gini,log2,200,"{'criterion': 'gini', 'max_features': 'log2', ...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
3,0.360092,0.013033,0.028017,0.004567,gini,log2,100,"{'criterion': 'gini', 'max_features': 'log2', ...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
4,0.899858,0.076273,0.059029,0.035088,gini,,200,"{'criterion': 'gini', 'max_features': None, 'n...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
5,0.627249,0.255095,0.046232,0.015989,gini,,100,"{'criterion': 'gini', 'max_features': None, 'n...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
6,2.148342,0.10413,0.114763,0.014631,entropy,sqrt,200,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
7,1.055094,0.045027,0.075825,0.012732,entropy,sqrt,100,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
8,2.114423,0.067031,0.142475,0.02747,entropy,log2,200,"{'criterion': 'entropy', 'max_features': 'log2...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
9,1.032298,0.066729,0.074924,0.008282,entropy,log2,100,"{'criterion': 'entropy', 'max_features': 'log2...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1


## Using Logistic Regression Selected feature from RFE to Train Random Forest model 

In [30]:
param_grid = {
    "n_estimators":[200,100],
    'max_features':['sqrt', 'log2', None],
    "criterion":["gini", "entropy", "log_loss"]
}

grid = GridSearchCV(RandomForestClassifier(),param_grid,scoring="accuracy",verbose=3,refit=True,n_jobs=-1)
grid.fit(log_rfe_feature[0],y_resampled)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


  self.best_estimator_.fit(X, y, **fit_params)


In [31]:
pd.DataFrame(grid.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,1.983735,0.03947,0.116385,0.018062,gini,sqrt,200,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1
1,0.949452,0.048813,0.06441,0.012981,gini,sqrt,100,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1
2,1.939931,0.084626,0.126858,0.012041,gini,log2,200,"{'criterion': 'gini', 'max_features': 'log2', ...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1
3,0.990424,0.061318,0.061799,0.007746,gini,log2,100,"{'criterion': 'gini', 'max_features': 'log2', ...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1
4,2.584702,0.12802,0.133426,0.016103,gini,,200,"{'criterion': 'gini', 'max_features': None, 'n...",0.643048,0.696524,0.68984,0.685408,0.702811,0.683526,0.021084,15
5,1.30122,0.066267,0.092832,0.032224,gini,,100,"{'criterion': 'gini', 'max_features': None, 'n...",0.643048,0.696524,0.68984,0.685408,0.702811,0.683526,0.021084,15
6,1.928731,0.055288,0.126603,0.006346,entropy,sqrt,200,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1
7,0.993543,0.050288,0.066763,0.012824,entropy,sqrt,100,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1
8,1.966498,0.049761,0.12899,0.015041,entropy,log2,200,"{'criterion': 'entropy', 'max_features': 'log2...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1
9,0.976202,0.04272,0.069237,0.002219,entropy,log2,100,"{'criterion': 'entropy', 'max_features': 'log2...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1


In [32]:
Selected_features[3]

Index(['1. Age_23-26', '2. Gender_Male', '3. University_Dhaka University (DU)',
       '3. University_Dhaka University of Engineering and Technology (DUET)',
       '3. University_Independent University, Bangladesh (IUB)',
       '3. University_Islamic University of Technology (IUT)',
       '3. University_North South University (NSU)',
       '4. Department_Business and Entrepreneurship Studies',
       '4. Department_Engineering - CS / CSE / CSC / Similar to CS',
       '4. Department_Other', '5. Academic Year_Fourth Year or Equivalent',
       '5. Academic Year_Other', '5. Academic Year_Second Year or Equivalent',
       '5. Academic Year_Third Year or Equivalent',
       '7. Did you receive a waiver or scholarship at your university?_Yes'],
      dtype='object')

In [33]:
# university = int(input("Enter:\n1 for IUB\n2 for AIUB\n3 for NSU\n4 for IUT\n5 for PSTU\n6 for RUET\n7 for DU\n8 for BUET\n9 for DUET\n10 for UIU\n11 for EWU\n12 for BRAC\n13 for BAU\n14 for RU\n15 for Daffodil\n"))-1
entered_age = int(input("Enter your Age: "))
age=""
if(entered_age<18):
    age="Below 18"
elif entered_age<=22:
    age="18-22"
elif entered_age<=26:
    age="23-26"
elif entered_age<=30:
    age="27-30"
else:
    age="Above 30"
    
entered_gender = input("Gender (male/female/m/f): ").lower()[0:1]

if "m" in entered_gender:
    gender="Male"
elif "f" in entered_gender:
    gender="Female"
else:
    gender="Prefer not to say"
    
for i,value in enumerate(x["3. University"].unique()):
    
    print(f"Enter: {i+1} for {value}")
    
university = int(input("Enter Here:"))-1
    
selected_university = ""
for i,value in enumerate(x["3. University"].unique()):
    if(university==i):
        selected_university=value
        break;

        
for i,value in enumerate(x["4. Department"].unique()):
    
    print(f"Enter: {i+1} for {value}")
    
department = int(input("Enter Here:"))-1
    
selected_department = ""
for i,value in enumerate(x["4. Department"].unique()):
    if(department==i):
        selected_department=value
        break;
    
for i,value in enumerate(x["5. Academic Year"].unique()):
    
    print(f"Enter: {i+1} for {value}")
    
academic_year = int(input("Enter Here:"))-1
    
selected_academic_year = ""
for i,value in enumerate(x["5. Academic Year"].unique()):
    if(academic_year==i):
        selected_academic_year=value
        break;
        
for i,value in enumerate(x["6. Current CGPA"].unique()):
    
    print(f"Enter: {i+1} for {value}")
    
current_cgpa = int(input("Enter Here:"))-1
    
selected_current_cgpa = ""
for i,value in enumerate(x["6. Current CGPA"].unique()):
    if(current_cgpa==i):
        selected_current_cgpa=value
        break;
        
for i,value in enumerate(x["7. Did you receive a waiver or scholarship at your university?"].unique()):
    
    print(f"Enter: {i+1} for {value}")
    
entered_scholarship = int(input("Enter Here:"))-1
    
selected_scholarship = ""
for i,value in enumerate(x["7. Did you receive a waiver or scholarship at your university?"].unique()):
    if(entered_scholarship==i):
        selected_scholarship=value
        break;

    
print(f"You're Age:{entered_age}, You are grouped under the age group: {age}")
print(f"You're Gender:{gender}")  

print(f"You're Selected University:{selected_university}")
print(f"You're Selected Department:{selected_department}")  
print(f"You're Selected Academic Year:{selected_academic_year}")
print(f"You're CGPA:{current_cgpa}, You are grouped under the CGPA group: {selected_current_cgpa}")
print(f"You're Scholarship Status:{selected_scholarship}")

Enter your Age: 27
Gender (male/female/m/f): m
Enter: 1 for Independent University, Bangladesh (IUB)
Enter: 2 for American International University Bangladesh (AIUB)
Enter: 3 for North South University (NSU)
Enter: 4 for Islamic University of Technology (IUT)
Enter: 5 for Patuakhali Science and Technology University
Enter: 6 for Rajshahi University of Engineering and Technology (RUET)
Enter: 7 for Dhaka University (DU)
Enter: 8 for Bangladesh University of Engineering and Technology (BUET)
Enter: 9 for Dhaka University of Engineering and Technology (DUET)
Enter: 10 for United International University (UIU)
Enter: 11 for East West University (EWU)
Enter: 12 for BRAC University
Enter: 13 for Bangladesh Agricultural University (BAU)
Enter: 14 for Rajshahi University (RU)
Enter: 15 for Daffodil University
Enter Here:7
Enter: 1 for Engineering - CS / CSE / CSC / Similar to CS
Enter: 2 for Engineering - EEE/ ECE / Similar to EEE
Enter: 3 for Other
Enter: 4 for Business and Entrepreneurship S

In [39]:
user_inputs = [age,gender,selected_university,selected_department,selected_academic_year,selected_current_cgpa,selected_scholarship]

In [40]:
inputfeatures = [0 for i in Selected_features[3]]

In [41]:
for i in user_inputs:
    for index,j in enumerate(Selected_features[3]):
        if j.endswith(i):
            if j.split("_")[-1] == i:
                inputfeatures[index] = 1

In [42]:
best_model.predict([inputfeatures])

array([1])

In [38]:
# newdata = pd.read_csv("PreAnxiety.csv")

In [1]:
# for k in newdata.iloc[:,0:7].values:
#     inputfeatures = [0] * len(Selected_features[3])
#     for i in k:
#         for index,j in enumerate(Selected_features[3]):
#             if j.endswith(i):
#                 if j.split("_")[-1] == i:
#                     inputfeatures[index] = 1
#     print(best_model.predict([inputfeatures]))            

In [None]:
# newdata.iloc[:,0:6]

In [44]:
import pickle

In [47]:
with open('./../4.Final Model/AnxietyModel.sav',"wb") as file:
    pickle.dump(best_model,file)

In [66]:
with open('./../4.Final Model/SelectedFeatures.txt',"w") as file:
    for feature in Selected_features[3]:
        file.write(f"{feature}\n")