Implement feature scaling and one hot encoding data preprocessing
techniques on the dataset imported in lab 4 or any other dataset.

In [5]:

import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# Load the dataset
data = pd.read_csv('student-dataset.csv')

# Drop the column with all missing values
data_cleaned = data.drop(columns=['ethnic.group'])

# Feature Scaling: Standardizing the numeric columns
scaler = StandardScaler()
numeric_columns = ['english.grade', 'math.grade', 'sciences.grade',
                   'language.grade', 'portfolio.rating',
                   'coverletter.rating', 'refletter.rating']
data_cleaned[numeric_columns] = scaler.fit_transform(data_cleaned[numeric_columns])

# One-Hot Encoding: Encoding categorical columns ('gender', 'nationality', 'city')
encoder = OneHotEncoder(sparse_output=False, drop='first')  # Using sparse_output=False for newer versions
categorical_columns = ['gender', 'nationality', 'city']

# Applying one-hot encoding and converting to a DataFrame
encoded_data = pd.DataFrame(encoder.fit_transform(data_cleaned[categorical_columns]),
                            columns=encoder.get_feature_names_out(categorical_columns))

# Concatenate the encoded columns back to the scaled dataset and drop the original categorical columns
data_preprocessed = pd.concat([data_cleaned.drop(columns=categorical_columns), encoded_data], axis=1)

# Display the first few rows of the preprocessed dataset
print(data_preprocessed.head())


   id             name  latitude  longitude  age  english.grade  math.grade  \
0   0        Kiana Lor     31.31     120.62   22       0.242250    0.600064   
1   1   Joshua Lonaker     34.39    -118.54   22      -0.873312   -0.450219   
2   2    Dakota Blanco     37.80    -122.27   22       0.985958    0.810121   
3   3  Natasha Yarusso     37.69    -122.09   20      -0.129604   -1.290446   
4   4   Brooke Cazares    -23.18     -45.88   21       0.614104   -1.710559   

   sciences.grade  language.grade  portfolio.rating  ...  city_Vacaville  \
0       -0.681907       -3.413999          0.014052  ...             0.0   
1        0.301859        0.606708          1.092527  ...             0.0   
2       -0.485154        0.606708         -1.064423  ...             0.0   
3       -0.485154        0.606708          1.092527  ...             0.0   
4       -0.091647       -3.413999          0.014052  ...             0.0   

   city_Vancouver  city_Visalia  city_Walnut Creek  city_Warsaw  \
0