# Predicting Student Addiction with machine learning algorithm

***Anand Kumar Dubey***
**3MSCDSA 23122005**


In [765]:
import matplotlib.pyplot as plt   #importing necessory libraries for plotting
import seaborn as sns

In [766]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler

from sklearn.pipeline import make_pipeline
import numpy as np

In [767]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV    #importing necessory libraries for modeling
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

**Loading and cleaning the Data**

In [768]:
#loading the data
test_df=pd.read_csv("student_addiction_dataset_test.csv")
train_df=pd.read_csv("student_addiction_dataset_test.csv")

In [769]:
print(train_df.head())

  Experimentation Academic_Performance_Decline Social_Isolation  \
0             Yes                          Yes              Yes   
1              No                           No              Yes   
2              No                           No               No   
3             Yes                           No              Yes   
4              No                           No               No   

  Financial_Issues Physical_Mental_Health_Problems Legal_Consequences  \
0               No                             Yes                 No   
1               No                              No                Yes   
2              Yes                              No                Yes   
3               No                             Yes                Yes   
4               No                             Yes                 No   

  Relationship_Strain Risk_Taking_Behavior Withdrawal_Symptoms  \
0                  No                   No                 Yes   
1                  No     

In [770]:
print(test_df.head())

  Experimentation Academic_Performance_Decline Social_Isolation  \
0             Yes                          Yes              Yes   
1              No                           No              Yes   
2              No                           No               No   
3             Yes                           No              Yes   
4              No                           No               No   

  Financial_Issues Physical_Mental_Health_Problems Legal_Consequences  \
0               No                             Yes                 No   
1               No                              No                Yes   
2              Yes                              No                Yes   
3               No                             Yes                Yes   
4               No                             Yes                 No   

  Relationship_Strain Risk_Taking_Behavior Withdrawal_Symptoms  \
0                  No                   No                 Yes   
1                  No     

In [771]:
# #handeling missing values
# train_df.isnull().sum()


In [772]:
# test_df.isnull().sum()

In [773]:
train_df.columns

Index(['Experimentation', 'Academic_Performance_Decline', 'Social_Isolation',
       'Financial_Issues', 'Physical_Mental_Health_Problems',
       'Legal_Consequences', 'Relationship_Strain', 'Risk_Taking_Behavior',
       'Withdrawal_Symptoms', 'Denial_and_Resistance_to_Treatment',
       'Addiction_Class'],
      dtype='object')

In [774]:
#since most of the values in the dataset is non numerical we will
#convert them in numeric data first as non numeric is not suitable for machine learning 
train_df['Experimentation']=train_df['Experimentation'].map({"Yes":1,"No":0})    

In [775]:
train_df['Academic_Performance_Decline']=train_df['Academic_Performance_Decline'].map({"Yes":1,"No":0})  
train_df['Social_Isolation']=train_df['Social_Isolation'].map({"Yes":1,"No":0})    
train_df['Financial_Issues']=train_df['Financial_Issues'].map({"Yes":1,"No":0})    
train_df['Physical_Mental_Health_Problems']=train_df['Physical_Mental_Health_Problems'].map({"Yes":1,"No":0}) 
train_df['Legal_Consequences']=train_df['Legal_Consequences'].map({"Yes":1,"No":0}) 
train_df['Relationship_Strain']=train_df['Relationship_Strain'].map({"Yes":1,"No":0}) 
train_df['Risk_Taking_Behavior']=train_df['Risk_Taking_Behavior'].map({"Yes":1,"No":0}) 
train_df['Withdrawal_Symptoms']=train_df['Withdrawal_Symptoms'].map({"Yes":1,"No":0}) 
train_df['Denial_and_Resistance_to_Treatment']=train_df['Denial_and_Resistance_to_Treatment'].map({"Yes":1,"No":0}) 
train_df['Addiction_Class']=train_df['Addiction_Class'].map({"Yes":1,"No":0}) 


In [776]:
#same thing we will do with test data
test_df.columns

Index(['Experimentation', 'Academic_Performance_Decline', 'Social_Isolation',
       'Financial_Issues', 'Physical_Mental_Health_Problems',
       'Legal_Consequences', 'Relationship_Strain', 'Risk_Taking_Behavior',
       'Withdrawal_Symptoms', 'Denial_and_Resistance_to_Treatment',
       'Addiction_Class'],
      dtype='object')

In [777]:
test_df['Experimentation']=test_df['Experimentation'].map({"Yes":1,"No":0}) 
test_df['Academic_Performance_Decline']=test_df['Academic_Performance_Decline'].map({"Yes":1,"No":0})  
test_df['Social_Isolation']=test_df['Social_Isolation'].map({"Yes":1,"No":0})    
test_df['Financial_Issues']=test_df['Financial_Issues'].map({"Yes":1,"No":0})    
test_df['Physical_Mental_Health_Problems']=test_df['Physical_Mental_Health_Problems'].map({"Yes":1,"No":0}) 
test_df['Legal_Consequences']=test_df['Legal_Consequences'].map({"Yes":1,"No":0}) 
test_df['Relationship_Strain']=test_df['Relationship_Strain'].map({"Yes":1,"No":0}) 
test_df['Risk_Taking_Behavior']=test_df['Risk_Taking_Behavior'].map({"Yes":1,"No":0}) 
test_df['Withdrawal_Symptoms']=test_df['Withdrawal_Symptoms'].map({"Yes":1,"No":0}) 
test_df['Denial_and_Resistance_to_Treatment']=test_df['Denial_and_Resistance_to_Treatment'].map({"Yes":1,"No":0}) 
test_df['Addiction_Class']=test_df['Addiction_Class'].map({"Yes":1,"No":0}) 


In [778]:
#Handeling Missing Values
train_df.isnull().sum()

Experimentation                       645
Academic_Performance_Decline          685
Social_Isolation                      677
Financial_Issues                      620
Physical_Mental_Health_Problems       665
Legal_Consequences                    686
Relationship_Strain                   632
Risk_Taking_Behavior                  613
Withdrawal_Symptoms                   653
Denial_and_Resistance_to_Treatment    654
Addiction_Class                         0
dtype: int64

In [779]:
train_df = train_df.fillna(train_df.median())

In [780]:
test_df=test_df.fillna(test_df.median())

------------------------------------------------------------------------------------------------------------------------

In [781]:
#since now the data is cleaned we can proceed for our machine learning modeling

In [782]:
# Define features and target variable
X_train = train_df.drop('Addiction_Class', axis=1)
y_train = train_df['Addiction_Class']

In [783]:
# Initialize and train the model
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

In [784]:
X_test = test_df.drop('Addiction_Class', axis=1)  # Remove the target variable from test data
predictions = rf_model.predict(X_test)

# Optionally, print or save the predictions
test_df['Predicted_Addiction_Class'] = predictions
test_df.to_csv('test_predictions.csv', index=False)

In [785]:
from sklearn.metrics import accuracy_score
true_labels = test_df['Addiction_Class']  # Assuming 'Addiction_Class' is the true labels column
predictions = test_df['Predicted_Addiction_Class']  # Assuming 'Predicted_Addiction_Class' contains model predictions

# Calculate accuracy
accuracy = accuracy_score(true_labels, predictions)

print("Accuracy:", accuracy)

Accuracy: 0.7977871939736346
