In [1]:
import pandas as pd

# Load the dataset
data = pd.read_csv("student_data.csv")

# Display the dataset
data


Unnamed: 0,CGPA,Projects,Internships,Skills,Attendance,Performance
0,8.8,3,2,8,90,High
1,7.5,2,1,7,85,Medium
2,6.2,1,0,5,75,Medium
3,5.5,0,0,3,60,Low
4,9.1,4,3,9,95,High
5,6.8,1,1,6,80,Medium
6,5.9,0,0,4,65,Low
7,8.2,3,2,8,88,High
8,6.0,1,0,5,70,Medium
9,5.2,0,0,3,58,Low


In [2]:

from sklearn.preprocessing import LabelEncoder

# Convert Performance column (text → numbers)
le = LabelEncoder()
data['Performance'] = le.fit_transform(data['Performance'])

# View updated data
data

# Separate features (X) and target (y)
X = data.drop('Performance', axis=1)
y = data['Performance']

X, y


(   CGPA  Projects  Internships  Skills  Attendance
 0   8.8         3            2       8          90
 1   7.5         2            1       7          85
 2   6.2         1            0       5          75
 3   5.5         0            0       3          60
 4   9.1         4            3       9          95
 5   6.8         1            1       6          80
 6   5.9         0            0       4          65
 7   8.2         3            2       8          88
 8   6.0         1            0       5          70
 9   5.2         0            0       3          58,
 0    0
 1    2
 2    2
 3    1
 4    0
 5    2
 6    1
 7    0
 8    2
 9    1
 Name: Performance, dtype: int32)

In [3]:

from sklearn.model_selection import train_test_split

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

X_train.shape, X_test.shape


((8, 5), (2, 5))

In [4]:

from sklearn.tree import DecisionTreeClassifier

# Create the model
model = DecisionTreeClassifier(random_state=42)

# Train the model
model.fit(X_train, y_train)


In [5]:

from sklearn.metrics import accuracy_score

# Predict on test data
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

accuracy


1.0

In [6]:

from sklearn.ensemble import RandomForestClassifier

# Create better model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train model
rf_model.fit(X_train, y_train)

# Test model
rf_pred = rf_model.predict(X_test)

# Accuracy
rf_accuracy = accuracy_score(y_test, rf_pred)

rf_accuracy


1.0

In [7]:

def suggest_improvement(student):
    suggestions = []

    if student['CGPA'] < 7:
        suggestions.append("Focus on improving CGPA through consistent study")

    if student['Projects'] < 2:
        suggestions.append("Work on more practical projects")

    if student['Internships'] < 1:
        suggestions.append("Try to gain internship experience")

    if student['Skills'] < 6:
        suggestions.append("Improve technical and soft skills")

    if student['Attendance'] < 75:
        suggestions.append("Maintain better attendance")

    if not suggestions:
        return "Great performance! Keep maintaining your consistency."
    
    return suggestions



In [8]:

# Sample student data
sample_student = {
    'CGPA': 6.5,
    'Projects': 1,
    'Internships': 0,
    'Skills': 5,
    'Attendance': 70
}

# Call the function
suggest_improvement(sample_student)


['Focus on improving CGPA through consistent study',
 'Work on more practical projects',
 'Try to gain internship experience',
 'Improve technical and soft skills',
 'Maintain better attendance']