In [1]:
import pandas as pd                               # import Pandas library for data manipulation and analysis
import matplotlib.pyplot as plt                   # import Matplotlib library for creating visualizations and plots
import seaborn as sns                             # import Seaborn library for creating statistical graphics
from sklearn.preprocessing import LabelEncoder    # import LabelEncoder from Scikit-learn library for label encoding categorical variables
from sklearn.preprocessing import StandardScaler  # import StandardScaler from Scikit-learn library for feature scaling
from sklearn.model_selection import train_test_split  # import train_test_split from Scikit-learn library for splitting data into training and testing sets
from sklearn.linear_model import LogisticRegression   # import LogisticRegression from Scikit-learn library for logistic regression analysis
from sklearn.naive_bayes import GaussianNB         # import GaussianNB from Scikit-learn library for Naive Bayes classification
from sklearn import svm                            # import svm from Scikit-learn library for support vector machine classification
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report  # import various metrics from Scikit-learn library for evaluating model performance


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
dataset = pd.read_csv('/content/drive/MyDrive/Machine_learning/PROJECTS/predictive_analytics/training_data_final.csv')
# Load a CSV file into a Pandas dataframe object, assuming that the file is located at the specified path


In [4]:
dataset.head()

Unnamed: 0,id_semester_evaluation,sem_present_count,sem_absent_count,sem_eval_lec_test_1_mark,sem_eval_lab_test_1_mark,semester_evaluation_mid_mark,sem_eval_lec_test_2_mark,sem_eval_lab_test_2_mark,semester_evaluation_pre_gtu_mark,semester_evaluation_internal_mark,semester_evaluation_gtu_mark
0,KTU001,98,2,27,30,29,29,29,29,29,69
1,KTU002,76,24,16,16,16,16,16,16,16,42
2,KTU003,81,19,17,17,17,15,17,16,17,45
3,KTU004,98,2,28,30,29,30,30,30,30,69
4,KTU005,92,8,25,24,25,24,23,24,25,59


In [8]:
cols_to_keep = ["id_semester_evaluation", "semester_evaluation_internal_mark", "semester_evaluation_gtu_mark"]
df = dataset.loc[:, cols_to_keep]


In [9]:
df.head()

Unnamed: 0,id_semester_evaluation,semester_evaluation_internal_mark,semester_evaluation_gtu_mark
0,KTU001,29,69
1,KTU002,16,42
2,KTU003,17,45
3,KTU004,30,69
4,KTU005,25,59


In [10]:
df['total'] = df['semester_evaluation_internal_mark'] + df['semester_evaluation_gtu_mark']


In [11]:
df.head()

Unnamed: 0,id_semester_evaluation,semester_evaluation_internal_mark,semester_evaluation_gtu_mark,total
0,KTU001,29,69,98
1,KTU002,16,42,58
2,KTU003,17,45,62
3,KTU004,30,69,99
4,KTU005,25,59,84


In [12]:
# missing values checking
df.isnull().sum()

id_semester_evaluation               0
semester_evaluation_internal_mark    0
semester_evaluation_gtu_mark         0
total                                0
dtype: int64

In [13]:
# checking column values data type
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 690 entries, 0 to 689
Data columns (total 4 columns):
 #   Column                             Non-Null Count  Dtype 
---  ------                             --------------  ----- 
 0   id_semester_evaluation             690 non-null    object
 1   semester_evaluation_internal_mark  690 non-null    int64 
 2   semester_evaluation_gtu_mark       690 non-null    int64 
 3   total                              690 non-null    int64 
dtypes: int64(3), object(1)
memory usage: 21.7+ KB


# Label Encoding Data

In [14]:
#label encoding the column 1
df['id_semester_evaluation'] = pd.factorize(df['id_semester_evaluation'])[0]


In [15]:
df.head()

Unnamed: 0,id_semester_evaluation,semester_evaluation_internal_mark,semester_evaluation_gtu_mark,total
0,0,29,69,98
1,1,16,42,58
2,2,17,45,62
3,3,30,69,99
4,4,25,59,84


In [20]:
# define a function to assign groups based on a mark value
def assign_group(mark):
    if mark < 10:
        return 'D'
    elif mark >= 10 and mark <= 18:
        return 'C'
    elif mark >= 19 and mark <= 25:
        return 'B'
    else:
        return 'A'

# apply the group assignment function to each value in the "semester_evaluation_internal_mark" column
df['Internal'] = df['semester_evaluation_internal_mark'].apply(assign_group)


In [21]:
df.head()

Unnamed: 0,id_semester_evaluation,semester_evaluation_internal_mark,semester_evaluation_gtu_mark,total,LAB,lab,Internal
0,0,29,69,98,A,A,A
1,1,16,42,58,C,C,C
2,2,17,45,62,C,C,C
3,3,30,69,99,A,A,A
4,4,25,59,84,B,B,B


In [24]:
# define a function to assign groups based on a mark value
def assign_group(mark):
    if mark < 28:
        return 'D'
    elif mark >= 28 and mark <= 45:
        return 'C'
    elif mark >= 46 and mark <= 60:
        return 'B'
    else:
        return 'A'


# apply the group assignment function to each value in the "semester_evaluation_gtu_mark" column
df['External'] = df['semester_evaluation_gtu_mark'].apply(assign_group)



In [29]:
df.head()

Unnamed: 0,id_semester_evaluation,semester_evaluation_internal_mark,semester_evaluation_gtu_mark,total,Internal,External
0,0,29,69,98,A,A
1,1,16,42,58,C,C
2,2,17,45,62,C,C
3,3,30,69,99,A,A
4,4,25,59,84,B,B


In [32]:
# define a function to assign groups based on a mark value
def assign_group(mark):
    if mark < 50:
        return 'F'
    elif mark >= 51 and mark <= 59:
        return 'E'
    elif mark >= 60 and mark <= 69:
        return 'D'
    elif mark >= 70 and mark <= 79:
        return 'C'
    elif mark >= 80 and mark <= 89:
        return 'B'    
    else:
        return 'A'


# apply the group assignment function to each value in the "semester_evaluation_gtu_mark" column
df['FINAL'] = df['total'].apply(assign_group)



In [33]:
df.head()

Unnamed: 0,id_semester_evaluation,semester_evaluation_internal_mark,semester_evaluation_gtu_mark,total,Internal,External,FINAL
0,0,29,69,98,A,A,A
1,1,16,42,58,C,C,E
2,2,17,45,62,C,C,D
3,3,30,69,99,A,A,A
4,4,25,59,84,B,B,B


In [35]:
# instantiate the label encoder
le = LabelEncoder()


# label encode the "INTERNAL", "EXTERNAL", and "FINAL" columns
df['Internal'] = le.fit_transform(df['Internal'])
df['External'] = le.fit_transform(df['External'])
df['FINAL'] = le.fit_transform(df['FINAL'])

In [37]:
df.head()


Unnamed: 0,id_semester_evaluation,semester_evaluation_internal_mark,semester_evaluation_gtu_mark,total,Internal,External,FINAL
0,0,29,69,98,0,0,0
1,1,16,42,58,2,2,4
2,2,17,45,62,2,2,3
3,3,30,69,99,0,0,0
4,4,25,59,84,1,1,1


In [38]:
from sklearn.model_selection import train_test_split

# X should contain the features (internal and external marks)
X = df[['semester_evaluation_internal_mark', 'semester_evaluation_gtu_mark']]
# y should contain the target variable (group classification)
y = df['FINAL']

# Split the data into a 80/20 train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Print the shape of the training and testing sets
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)


X_train shape: (552, 2)
X_test shape: (138, 2)
y_train shape: (552,)
y_test shape: (138,)


In [40]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df[['semester_evaluation_internal_mark', 'semester_evaluation_gtu_mark', 'total']], df['FINAL'], test_size=0.2, random_state=42)

# Create the classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier on the training data
clf.fit(X_train, y_train)




In [41]:
# Make predictions on the testing data
y_pred = clf.predict(X_test)


In [42]:

# Evaluate the performance of the classifier
accuracy = clf.score(X_test, y_test)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 1.00


In [45]:
import pandas as pd

# Load the test data
test_df = pd.read_csv('/content/drive/MyDrive/Machine_learning/PROJECTS/predictive_analytics/Testing_data_final.csv')

# Extract the features from the test data
X_test = test_df[['semester_evaluation_internal_mark', 'semester_evaluation_gtu_mark', 'total']]

# Make predictions on the test data
y_pred = clf.predict(X_test)

# Print the predicted groups
print(y_pred)


[1 2 3 0 3 1 1 0 3 0 0 3 3 2 3 0 0 2 0 0 1 4 2 0 1 0 2 3 1 3 1 0 0 0 3 4 3
 3 1 2 3 1 2 1 2 1 1 0 1 0 1 0 0 3 1 2 3 0 1 1 3 0 2 3 0 2 0 2 0]


In [46]:
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 1.00


In [49]:
import pandas as pd

# Load the test dataset
test_df = pd.read_csv('/content/drive/MyDrive/Machine_learning/PROJECTS/predictive_analytics/Testing_data_final.csv')

# Ask for the roll number of the student
roll_number = input("Enter the roll number of the student: ")

# Find the row with the given roll number
student_df = test_df[test_df['id_semester_evaluation'] == roll_number]

# Make predictions using the trained model for the given student
student_predictions = clf.predict(student_df[['semester_evaluation_internal_mark', 'semester_evaluation_gtu_mark', 'total']])

# Map the predicted class to group name
group_names = {0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F'}
student_group = group_names[student_predictions[0]]

# Get the link to the Microsoft Word document for the student's group
links = {
    'A': 'https://drive.google.com/file/d/1aBcDeFGhijKLMNopqrstuvWXyZ/view?usp=sharing',
    'B': 'https://drive.google.com/file/d/2aBcDeFGhijKLMNopqrstuvWXyZ/view?usp=sharing',
    'C': 'https://drive.google.com/file/d/3aBcDeFGhijKLMNopqrstuvWXyZ/view?usp=sharing',
    'D': 'https://drive.google.com/file/d/4aBcDeFGhijKLMNopqrstuvWXyZ/view?usp=sharing',
    'E': 'https://drive.google.com/file/d/5aBcDeFGhijKLMNopqrstuvWXyZ/view?usp=sharing',
    'F': 'https://drive.google.com/file/d/6aBcDeFGhijKLMNopqrstuvWXyZ/view?usp=sharing'
}

link = links[student_group]

# Print the result for the given student
print(f"Roll Number: {roll_number}")
print(f"Group: {student_group}")
print(f"Link to Help Document: {link}")


Enter the roll number of the student: CS712
Roll Number: CS712
Group: E
Link to Help Document: https://drive.google.com/file/d/5aBcDeFGhijKLMNopqrstuvWXyZ/view?usp=sharing
