# Data Loading and Exploration:

Loaded the dataset into a pandas DataFrame.
Displayed the dataset.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

In [2]:
data = pd.read_csv ('/kaggle/input/students-adaptability-level-in-online-education/students_adaptability_level_online_education.csv')

In [3]:
data

Unnamed: 0,Gender,Age,Education Level,Institution Type,IT Student,Location,Load-shedding,Financial Condition,Internet Type,Network Type,Class Duration,Self Lms,Device,Adaptivity Level
0,Boy,21-25,University,Non Government,No,Yes,Low,Mid,Wifi,4G,3-6,No,Tab,Moderate
1,Girl,21-25,University,Non Government,No,Yes,High,Mid,Mobile Data,4G,1-3,Yes,Mobile,Moderate
2,Girl,16-20,College,Government,No,Yes,Low,Mid,Wifi,4G,1-3,No,Mobile,Moderate
3,Girl,11-15,School,Non Government,No,Yes,Low,Mid,Mobile Data,4G,1-3,No,Mobile,Moderate
4,Girl,16-20,School,Non Government,No,Yes,Low,Poor,Mobile Data,3G,0,No,Mobile,Low
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1200,Girl,16-20,College,Non Government,No,Yes,Low,Mid,Wifi,4G,1-3,No,Mobile,Low
1201,Girl,16-20,College,Non Government,No,No,High,Mid,Wifi,4G,3-6,No,Mobile,Moderate
1202,Boy,11-15,School,Non Government,No,Yes,Low,Mid,Mobile Data,3G,1-3,No,Mobile,Moderate
1203,Girl,16-20,College,Non Government,No,No,Low,Mid,Wifi,4G,1-3,No,Mobile,Low


# Label Encoding:

Demonstrated how to use LabelEncoder to convert categorical data into numeric labels.

In [4]:
from sklearn.preprocessing import LabelEncoder

# Sample categorical data
categories = ['red', 'blue', 'green', 'red', 'green']

# Initialize the label encoder
label_encoder = LabelEncoder()

# Fit and transform the categorical data
encoded_labels = label_encoder.fit_transform(categories)

# Print the original and encoded data
print("Original Categories:", categories)
print("Encoded Labels:", encoded_labels)
# Print the mapping between original categories and encoded labels
print("Mapping:", dict(zip(label_encoder.classes_, range(len(label_encoder.classes_)))))

Original Categories: ['red', 'blue', 'green', 'red', 'green']
Encoded Labels: [2 0 1 2 1]
Mapping: {'blue': 0, 'green': 1, 'red': 2}


# Data Preparation:
# 
Created a DataFrame and performed label encoding on all categorical columns.
Split the data into features (X) and labels (y).

In [5]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Sample DataFrame
data = {
    'Gender': ['Male', 'Female', 'Male', 'Female', 'Male'],
    'Age': ['18-24', '25-34', '18-24', '35-44', '25-34'],
    'Education Level': ['High School', 'Bachelor', 'High School', 'Master', 'Bachelor'],
    'Institution Type': ['Public', 'Private', 'Public', 'Private', 'Public'],
    'IT Student': ['Yes', 'No', 'Yes', 'No', 'Yes'],
    'Location': ['Urban', 'Rural', 'Urban', 'Urban', 'Rural'],
    'Load-shedding': ['Low', 'High', 'Medium', 'Low', 'High'],
    'Financial Condition': ['Good', 'Poor', 'Good', 'Excellent', 'Fair'],
    'Internet Type': ['Broadband', 'Mobile Data', 'Wi-Fi', 'Broadband', 'Mobile Data'],
    'Network Type': ['Wired', 'Wireless', 'Wired', 'Wireless', 'Wired']
}

df = pd.DataFrame(data)

# Initialize the label encoder
label_encoder = LabelEncoder()

# Apply label encoding to each categorical column
for column in df.select_dtypes(include=['object']).columns:
    df[column + '_encoded'] = label_encoder.fit_transform(df[column])

# Display the encoded DataFrame
print(df)

   Gender    Age Education Level Institution Type IT Student Location  \
0    Male  18-24     High School           Public        Yes    Urban   
1  Female  25-34        Bachelor          Private         No    Rural   
2    Male  18-24     High School           Public        Yes    Urban   
3  Female  35-44          Master          Private         No    Urban   
4    Male  25-34        Bachelor           Public        Yes    Rural   

  Load-shedding Financial Condition Internet Type Network Type  \
0           Low                Good     Broadband        Wired   
1          High                Poor   Mobile Data     Wireless   
2        Medium                Good         Wi-Fi        Wired   
3           Low           Excellent     Broadband     Wireless   
4          High                Fair   Mobile Data        Wired   

   Gender_encoded  Age_encoded  Education Level_encoded  \
0               1            0                        1   
1               0            1                

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
# Features (X)
features = df.drop(['Load-shedding_encoded'], axis=1)

# Labels (y)
labels = df['Load-shedding_encoded']

# Display the features and labels
print("Features:")
print(features)
print("\nLabels:")
print(labels)


Features:
   Gender    Age Education Level Institution Type IT Student Location  \
0    Male  18-24     High School           Public        Yes    Urban   
1  Female  25-34        Bachelor          Private         No    Rural   
2    Male  18-24     High School           Public        Yes    Urban   
3  Female  35-44          Master          Private         No    Urban   
4    Male  25-34        Bachelor           Public        Yes    Rural   

  Load-shedding Financial Condition Internet Type Network Type  \
0           Low                Good     Broadband        Wired   
1          High                Poor   Mobile Data     Wireless   
2        Medium                Good         Wi-Fi        Wired   
3           Low           Excellent     Broadband     Wireless   
4          High                Fair   Mobile Data        Wired   

   Gender_encoded  Age_encoded  Education Level_encoded  \
0               1            0                        1   
1               0            1      

# Train-Test Split:

Split the dataset into training and testing sets using train_test_split.


In [8]:
x=features
y=labels

In [9]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)

In [10]:
x_train

Unnamed: 0,Gender,Age,Education Level,Institution Type,IT Student,Location,Load-shedding,Financial Condition,Internet Type,Network Type,Gender_encoded,Age_encoded,Education Level_encoded,Institution Type_encoded,IT Student_encoded,Location_encoded,Financial Condition_encoded,Internet Type_encoded,Network Type_encoded
2,Male,18-24,High School,Public,Yes,Urban,Medium,Good,Wi-Fi,Wired,1,0,1,1,1,1,2,2,0
0,Male,18-24,High School,Public,Yes,Urban,Low,Good,Broadband,Wired,1,0,1,1,1,1,2,0,0
3,Female,35-44,Master,Private,No,Urban,Low,Excellent,Broadband,Wireless,0,2,2,0,0,1,0,0,1


In [11]:
y_train

2    2
0    1
3    1
Name: Load-shedding_encoded, dtype: int64

In [12]:
x_test

Unnamed: 0,Gender,Age,Education Level,Institution Type,IT Student,Location,Load-shedding,Financial Condition,Internet Type,Network Type,Gender_encoded,Age_encoded,Education Level_encoded,Institution Type_encoded,IT Student_encoded,Location_encoded,Financial Condition_encoded,Internet Type_encoded,Network Type_encoded
1,Female,25-34,Bachelor,Private,No,Rural,High,Poor,Mobile Data,Wireless,0,1,0,0,0,0,3,1,1
4,Male,25-34,Bachelor,Public,Yes,Rural,High,Fair,Mobile Data,Wired,1,1,0,1,1,0,1,1,0


In [13]:
y_test

1    0
4    0
Name: Load-shedding_encoded, dtype: int64

In [14]:
from sklearn.preprocessing import MinMaxScaler

# Pipeline Creation:

Set up a pipeline for preprocessing, which includes one-hot encoding of categorical variables and standard scaling.


In [15]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Sample DataFrame
data = {
    'Gender': ['Male', 'Female', 'Male', 'Female', 'Male'],
    'Age': ['18-24', '25-34', '18-24', '35-44', '25-34'],
    'Education Level': ['High School', 'Bachelor', 'High School', 'Master', 'Bachelor'],
    'Institution Type': ['Public', 'Private', 'Public', 'Private', 'Public'],
    'IT Student': ['Yes', 'No', 'Yes', 'No', 'Yes'],
    'Location': ['Urban', 'Rural', 'Urban', 'Urban', 'Rural'],
    'Load-shedding': ['Low', 'High', 'Medium', 'Low', 'High'],
    'Financial Condition': ['Good', 'Poor', 'Good', 'Excellent', 'Fair'],
    'Internet Type': ['Broadband', 'Mobile Data', 'Wi-Fi', 'Broadband', 'Mobile Data'],
    'Network Type': ['Wired', 'Wireless', 'Wired', 'Wireless', 'Wired']
}

df = pd.DataFrame(data)

# Separate features and labels
X = df.drop(['Load-shedding'], axis=1)
y = df['Load-shedding']

# Identify categorical columns
categorical_columns = X.select_dtypes(include=['object']).columns

# Create a ColumnTransformer to apply different preprocessing steps to different columns
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), categorical_columns)
    ],
    remainder='passthrough'  # To keep any numerical columns, though none are in this example
)

# Create a pipeline with preprocessing and standard scaling
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('scaler', StandardScaler(with_mean=False))  # Set 'with_mean=False' because OneHotEncoder creates sparse matrix
])

# Fit and transform the data using the pipeline
X_transformed = pipeline.fit_transform(X)

# Print the transformed data (optional)
print(X_transformed)

# The following line assumes you have a separate x_train dataset to transform
# If you have x_train defined, use the pipeline to transform it:
# x_train_transformed = pipeline.transform(x_train)


[[0.         2.04124145 2.04124145 0.         0.         0.
  2.04124145 0.         0.         2.04124145 0.         2.04124145
  0.         2.04124145 0.         0.         2.04124145 0.
  2.04124145 0.         0.         2.04124145 0.        ]
 [2.04124145 0.         0.         2.04124145 0.         2.04124145
  0.         0.         2.04124145 0.         2.04124145 0.
  2.04124145 0.         0.         0.         0.         2.5
  0.         2.04124145 0.         0.         2.04124145]
 [0.         2.04124145 2.04124145 0.         0.         0.
  2.04124145 0.         0.         2.04124145 0.         2.04124145
  0.         2.04124145 0.         0.         2.04124145 0.
  0.         0.         2.5        2.04124145 0.        ]
 [2.04124145 0.         0.         0.         2.5        0.
  0.         2.5        2.04124145 0.         2.04124145 0.
  0.         2.04124145 2.5        0.         0.         0.
  2.04124145 0.         0.         0.         2.04124145]
 [0.         2.04124145

In [16]:
scaler_object = MinMaxScaler()

Neural Network Model Setup:

Defined a simple neural network model using TensorFlow's Keras API.
Compiled the model with an optimizer and loss function suitable for binary classification.

In [17]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Sample DataFrame
data = {
    'Gender': ['Male', 'Female', 'Male', 'Female', 'Male'],
    'Age': ['18-24', '25-34', '18-24', '35-44', '25-34'],
    'Education Level': ['High School', 'Bachelor', 'High School', 'Master', 'Bachelor'],
    'Institution Type': ['Public', 'Private', 'Public', 'Private', 'Public'],
    'IT Student': ['Yes', 'No', 'Yes', 'No', 'Yes'],
    'Location': ['Urban', 'Rural', 'Urban', 'Urban', 'Rural'],
    'Load-shedding': ['Low', 'High', 'Medium', 'Low', 'High'],
    'Financial Condition': ['Good', 'Poor', 'Good', 'Excellent', 'Fair'],
    'Internet Type': ['Broadband', 'Mobile Data', 'Wi-Fi', 'Broadband', 'Mobile Data'],
    'Network Type': ['Wired', 'Wireless', 'Wired', 'Wireless', 'Wired']
}

df = pd.DataFrame(data)

# Separate features and labels
X = df.drop(['Load-shedding'], axis=1)
y = df['Load-shedding']

# Identify categorical columns
categorical_columns = X.select_dtypes(include=['object']).columns

# Create a transformer for one-hot encoding and standard scaling
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), categorical_columns)
    ],
    remainder='passthrough'
)

# Create a pipeline with one-hot encoding and standard scaling
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('scaler', StandardScaler())
])

# Fit and transform the data using the pipeline
X_transformed = pipeline.fit_transform(X)

# If you have a separate x_train dataset, transform it using the same pipeline
x_train_transformed = pipeline.transform(x_train)

# Now, you can use the transformed data for further processing or modeling


In [18]:
# Separate features and labels
X = df.drop(['Load-shedding'], axis=1)
y = df['Load-shedding']


In [19]:
# Assuming you have a dataset for training
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)


In [20]:
# Fit and transform the training data using the same pipeline
x_train_transformed = pipeline.transform(x_train)


In [21]:
# Now, you can use the transformed training data for further processing or modeling
# For example, you can use x_train_transformed in your machine learning models


# Neural Network Model Setup:

Defined a simple neural network model using TensorFlow's Keras API.
Compiled the model with an optimizer and loss function suitable for binary classification.

In [22]:
import tensorflow as tf

# Assuming x_train_transformed is already defined and transformed
input_shape = x_train_transformed.shape[1]

# Define your model using the Sequential API
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(input_shape,)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')  # Assuming binary classification
])

# Print the model summary (optional)
model.summary()


In [23]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [24]:
from sklearn.preprocessing import LabelEncoder

# Combine y_train and y_test to fit the encoder on all possible labels
combined_labels = list(y_train) + list(y_test)

# Encode target variable
label_encoder = LabelEncoder()
label_encoder.fit(combined_labels)

# Transform the training and test sets
y_train_encoded = label_encoder.transform(y_train)
y_test_encoded = label_encoder.transform(y_test)
# Assuming you have your pipeline already defined and fitted on x_train
x_test_transformed = pipeline.transform(x_test)

# Now you can use the transformed data for model training and validation
epochs = 10
batch_size = 20
history = model.fit(x_train_transformed, y_train_encoded, epochs=epochs, batch_size=batch_size, validation_data=(x_test_transformed, y_test_encoded))


Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.0000e+00 - loss: 0.7537 - val_accuracy: 0.5000 - val_loss: 0.7491
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.3333 - loss: 0.5228 - val_accuracy: 0.5000 - val_loss: 0.7849
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.6667 - loss: 0.3174 - val_accuracy: 0.5000 - val_loss: 0.8212
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.6667 - loss: 0.1344 - val_accuracy: 0.5000 - val_loss: 0.8571
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 0.6667 - loss: -0.0263 - val_accuracy: 0.0000e+00 - val_loss: 0.8932
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.6667 - loss: -0.1687 - val_accuracy: 0.0000e+00 - val_loss: 0.9291
Epoch 7/10
[1m1/1[0m [32m━━━━━━

In [25]:
from sklearn.preprocessing import LabelEncoder

# Assuming 'y_test' is a pandas Series
label_encoder = LabelEncoder()
y_test_encoded = label_encoder.fit_transform(y_test)

# Then, use 'y_test_encoded' with the evaluate function
test_loss, test_accuracy = model.evaluate(x_test_transformed, y_test_encoded)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.0000e+00 - loss: 1.0738
Test Loss: 1.0737898349761963, Test Accuracy: 0.0
