In [1]:
#sol1)

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


In [2]:
# Load the dataset with the first row as header
url = 'https://github.com/FlipRoboTechnologies/ML-Datasets/raw/main/Glass%20Identification/Glass%20Identification.csv'
glass_df = pd.read_csv(url, header=0)


In [3]:
# Display the first few rows to understand the dataset structure
print("First few rows of the dataset:")
print(glass_df.head())


First few rows of the dataset:
   1  1.52101  13.64  4.49  1.10  71.78  0.06  8.75  0.00  0.00.1  1.1
0  2  1.51761  13.89  3.60  1.36  72.73  0.48  7.83   0.0    0.00    1
1  3  1.51618  13.53  3.55  1.54  72.99  0.39  7.78   0.0    0.00    1
2  4  1.51766  13.21  3.69  1.29  72.61  0.57  8.22   0.0    0.00    1
3  5  1.51742  13.27  3.62  1.24  73.08  0.55  8.07   0.0    0.00    1
4  6  1.51596  12.79  3.61  1.62  72.97  0.64  8.07   0.0    0.26    1


In [4]:
# Correct column names
glass_df.columns = ['Id', 'RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe', 'Type']


In [5]:
# Verify the column names
print("Column Names after renaming:")
print(glass_df.columns)


Column Names after renaming:
Index(['Id', 'RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe', 'Type'], dtype='object')


In [6]:
# Set the target column
target_column = 'Type'


In [7]:
# Separate features and target
X = glass_df.drop(columns=[target_column])
y = glass_df[target_column]


In [8]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [9]:
# Choose a classification model
model = RandomForestClassifier(n_estimators=100, random_state=42)


In [10]:
# Train the model
model.fit(X_train, y_train)


In [11]:
# Make predictions
y_pred = model.predict(X_test)


In [12]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)


In [13]:
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(report)

Accuracy: 0.9767441860465116
Classification Report:
              precision    recall  f1-score   support

           1       0.91      1.00      0.95        10
           2       1.00      0.93      0.97        15
           3       1.00      1.00      1.00         3
           5       1.00      1.00      1.00         3
           6       1.00      1.00      1.00         3
           7       1.00      1.00      1.00         9

    accuracy                           0.98        43
   macro avg       0.98      0.99      0.99        43
weighted avg       0.98      0.98      0.98        43



In [14]:
#sol2)

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error


In [15]:
# Load the dataset
url = "https://raw.githubusercontent.com/FlipRoboTechnologies/ML-Datasets/main/Grades/Grades.csv"
df = pd.read_csv(url)


In [16]:
# Print initial data exploration information
print("Initial Data Exploration Information:")
print(df.head())  # Display the first few rows
print(df.info())  # Summary of the dataset
print(df.describe())  # Statistical summary of numerical columns
print(df.isnull().sum())  # Check for missing values


Initial Data Exploration Information:
   Seat No. PH-121 HS-101 CY-105 HS-105/12 MT-111 CS-105 CS-106 EL-102 EE-119  \
0  CS-97001     B-     D+     C-         C     C-     D+      D     C-     B-   
1  CS-97002      A      D     D+         D     B-      C      D      A     D+   
2  CS-97003      A      B      A        B-     B+      A     B-     B+     A-   
3  CS-97004      D     C+     D+         D      D     A-     D+     C-      D   
4  CS-97005     A-     A-     A-        B+      A      A     A-     B+      A   

   ... CS-312 CS-317 CS-403 CS-421 CS-406 CS-414 CS-419 CS-423 CS-412   CGPA  
0  ...     C-     C-     C-     C-     A-      A     C-      B     A-  2.205  
1  ...     D+      D      C      D     A-     B-      C      C      B  2.008  
2  ...      B      B      A      C      A      A      A     A-      A  3.608  
3  ...     D+      C     D+     C-     B-      B     C+     C+     C+  1.906  
4  ...     B-     B+     B+     B-     A-      A     A-     A-      A  3.448  



In [17]:
# Drop rows with missing values
df.dropna(inplace=True)


In [18]:
# Split the data into features (X) and target variable (CGPA)
X = df.drop(columns=['Seat No.', 'CGPA'])  # Remove irrelevant columns
y = df['CGPA']


In [19]:
# Convert categorical variables into numerical representations
X_encoded = pd.get_dummies(X)


In [20]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)


In [21]:
# Train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)


In [22]:
# Make predictions
y_pred = model.predict(X_test)


In [23]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = (mse ** 0.5)
print("\nModel Evaluation:")
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("Root Mean Squared Error:", rmse)



Model Evaluation:
Mean Squared Error: 0.0036629639720725674
Mean Absolute Error: 0.04755007652629767
Root Mean Squared Error: 0.06052242536508734
