<a href="https://colab.research.google.com/github/VectorJamo/Deep-Learning/blob/main/Binary_Classification_Using_Support_Vector_Machine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [60]:
# SVM classifies data by defining a hyperplane with maximal margin that separates the the data clusters
import numpy as py
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

In [54]:
dataset = pd.read_csv('/content/Student Depression Dataset.csv')

In [55]:
dataset.head()

Unnamed: 0,id,Gender,Age,City,Profession,Academic Pressure,Work Pressure,CGPA,Study Satisfaction,Job Satisfaction,Sleep Duration,Dietary Habits,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Family History of Mental Illness,Depression
0,2,Male,33.0,Visakhapatnam,Student,5.0,0.0,8.97,2.0,0.0,5-6 hours,Healthy,B.Pharm,Yes,3.0,1.0,No,1
1,8,Female,24.0,Bangalore,Student,2.0,0.0,5.9,5.0,0.0,5-6 hours,Moderate,BSc,No,3.0,2.0,Yes,0
2,26,Male,31.0,Srinagar,Student,3.0,0.0,7.03,5.0,0.0,Less than 5 hours,Healthy,BA,No,9.0,1.0,Yes,0
3,30,Female,28.0,Varanasi,Student,3.0,0.0,5.59,2.0,0.0,7-8 hours,Moderate,BCA,Yes,4.0,5.0,Yes,1
4,32,Female,25.0,Jaipur,Student,4.0,0.0,8.13,3.0,0.0,5-6 hours,Moderate,M.Tech,Yes,1.0,1.0,No,0


In [56]:
# We will extract the following columns to train our model: Gender, Academic Pressure, CGPA, Study Satisfaction, Suicidial Thoughts, Financial Stress,
# and Family history of Mental illness
dataset = dataset[['Gender', 'Academic Pressure', 'CGPA', 'Study Satisfaction', 'Have you ever had suicidal thoughts ?', 'Financial Stress', 'Family History of Mental Illness', 'Depression']]
dataset

Unnamed: 0,Gender,Academic Pressure,CGPA,Study Satisfaction,Have you ever had suicidal thoughts ?,Financial Stress,Family History of Mental Illness,Depression
0,Male,5.0,8.97,2.0,Yes,1.0,No,1
1,Female,2.0,5.90,5.0,No,2.0,Yes,0
2,Male,3.0,7.03,5.0,No,1.0,Yes,0
3,Female,3.0,5.59,2.0,Yes,5.0,Yes,1
4,Female,4.0,8.13,3.0,Yes,1.0,No,0
...,...,...,...,...,...,...,...,...
27896,Female,5.0,5.75,5.0,Yes,1.0,Yes,0
27897,Male,2.0,9.40,3.0,No,3.0,Yes,0
27898,Male,3.0,6.61,4.0,No,2.0,No,0
27899,Female,5.0,6.88,2.0,Yes,5.0,No,1


In [57]:
def normalize(value, max):
  return value/max

In [58]:
max_academic_pressure = dataset['Academic Pressure'].max()
max_study_satisfaction = dataset['Study Satisfaction'].max()
max_cgpa = dataset['CGPA'].max()
max_financial_stress = dataset['Financial Stress'].max()

# Normalize the values
dataset['Academic Pressure'] = [normalize(x, max_academic_pressure) for x in dataset['Academic Pressure']]
dataset['Study Satisfaction'] = [normalize(x, max_study_satisfaction) for x in dataset['Study Satisfaction']]
dataset['CGPA'] = [normalize(x, max_cgpa) for x in dataset['CGPA']]
dataset['Financial Stress'] = [normalize(x, max_financial_stress) for x in dataset['Financial Stress']]

# Replace Yes/No and binary data with 1 and 0
dataset['Have you ever had suicidal thoughts ?'] = [1 if x == 'Yes' else 0 for x in dataset['Have you ever had suicidal thoughts ?']]
dataset['Family History of Mental Illness'] = [1 if x == 'Yes' else 0 for x in dataset['Family History of Mental Illness']]
dataset['Gender'] = [1 if x == 'Male' else 0 for x in dataset['Gender']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset['Academic Pressure'] = [normalize(x, max_academic_pressure) for x in dataset['Academic Pressure']]


In [66]:
dataset.head(20)

Unnamed: 0,Gender,Academic Pressure,CGPA,Study Satisfaction,Have you ever had suicidal thoughts ?,Financial Stress,Family History of Mental Illness,Depression
0,1,1.0,0.897,0.4,1,0.2,0,1
1,0,0.4,0.59,1.0,0,0.4,1,0
2,1,0.6,0.703,1.0,0,0.2,1,0
3,0,0.6,0.559,0.4,1,1.0,1,1
4,0,0.8,0.813,0.6,1,0.2,0,0
5,1,0.4,0.57,0.6,0,0.2,0,0
6,1,0.6,0.954,0.8,0,0.4,0,0
7,0,0.4,0.804,0.8,0,0.2,1,0
8,1,0.6,0.979,0.2,1,0.6,0,1
9,1,0.4,0.838,0.6,1,1.0,0,1


In [85]:
# Convert the pandas dataframe into numpy arrays
features = ['Gender', 'Academic Pressure', 'CGPA', 'Study Satisfaction', 'Have you ever had suicidal thoughts ?', 'Financial Stress', 'Family History of Mental Illness']
target = 'Depression'
dataset_features = dataset[features].values
dataset_target = dataset[target].values

In [86]:
print(dataset_features)

[[1.    1.    0.897 ... 1.    0.2   0.   ]
 [0.    0.4   0.59  ... 0.    0.4   1.   ]
 [1.    0.6   0.703 ... 0.    0.2   1.   ]
 ...
 [1.    0.6   0.661 ... 0.    0.4   0.   ]
 [0.    1.    0.688 ... 1.    1.    0.   ]
 [1.    0.8   0.924 ... 1.    0.6   1.   ]]


In [87]:
print(dataset_target)

[1 0 0 ... 0 1 1]


In [93]:
# Split the data into training and testing
X_train, X_test, Y_train, Y_test = train_test_split(dataset_features, dataset_target, train_size=0.8, test_size=0.2, random_state=40, shuffle=True)

In [94]:
print(X_train.shape)

(22320, 7)


In [95]:
print(X_test.shape)

(5581, 7)


In [None]:
# Create the model
