In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn import metrics
from sklearn.svm import SVC
from xgboost import XGBRegressor
from sklearn.linear_model import LogisticRegression, Lasso, Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('/content/Student_Details.csv')
df.head()

In [None]:
#handling missing values
df.isnull().sum()  # Check for missing values
df.fillna(method='ffill', inplace=True)  # Example: Forward fill missing values


In [None]:
#converting categoricl values to numerical values
df['Extra_Curricular_Activities'] = df['Extra_Curricular_Activities'].map({'Yes': 1, 'No': 0})


In [None]:
#feature scaling
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_features = scaler.fit_transform(df.drop(columns=["Student_ID"]))
df_scaled = pd.DataFrame(scaled_features, columns=df.columns[1:])


In [None]:
#split the dataset
X = df_scaled.drop(columns=["Extra_Curricular_Activities"])  # Replace with your target column name
y = df["Extra_Curricular_Activities"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
from sklearn.linear_model import LogisticRegression
!pip install mord
import mord

# Create a Logistic Regression model
model = LogisticRegression()

# Train the model
model = mord.OrdinalRidge()
model.fit(X_train, y_train)

In [None]:
#performance
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")


In [None]:
#finetuning
from sklearn.model_selection import GridSearchCV

params = {'C': [0.01, 0.1, 1, 10]}
grid_search = GridSearchCV(LogisticRegression(), param_grid=params, cv=5)
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_
print(f"Best Parameters: {grid_search.best_params_}")


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(10, 6))
sns.boxplot(data=df_scaled, x='CGPA')
plt.title("Box Plot of CGPA")
plt.xlabel("CGPA")
plt.show()
plt.figure(figsize=(10, 6))
sns.histplot(df['CGPA'], kde=True, bins=15, color='blue')
plt.title("Histogram of CGPA")
plt.xlabel("CGPA")
plt.ylabel("Frequency")
plt.show()
plt.figure(figsize=(10, 6))
sns.violinplot(data=df_scaled, x='Extra_Curricular_Activities', y='CGPA', palette='coolwarm')
plt.title("Violin Plot of CGPA by Extra-Curricular Activities")
plt.xlabel("Extra-Curricular Activities")
plt.ylabel("CGPA")
plt.show()
sns.pairplot(df_scaled, diag_kind="kde", corner=True)
plt.suptitle("Pair Plot of Student Data", y=1.02)
plt.show()
plt.figure(figsize=(10, 6))
plt.scatter(range(len(y_test)), y_test, label='Actual', color='blue', alpha=0.6)
plt.scatter(range(len(y_test)), y_pred, label='Predicted', color='red', alpha=0.6)
plt.title("Actual vs Predicted Performance")
plt.xlabel("Students")
plt.ylabel("Performance")
plt.legend()
plt.show()
# plt.figure(figsize=(12, 8))
# sns.heatmap(df.corr(), annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
# plt.title("Correlation Heatmap")
# plt.show()
plt.figure(figsize=(10, 6))
sns.barplot(data=df, x='Extra_Curricular_Activities', y='CGPA', palette='viridis')
plt.title("Average CGPA by Extra-Curricular Activities")
plt.xlabel("Extra-Curricular Activities")
plt.ylabel("Average CGPA")
plt.show()
plt.figure(figsize=(10, 6))
sns.histplot(y_test, color='blue', label='Actual', kde=True, stat='density')
sns.histplot(y_pred, color='red', label='Predicted', kde=True, stat='density')
plt.title("Density Plot of Actual vs Predicted Performance")
plt.xlabel("Performance")
plt.ylabel("Density")
plt.legend()
plt.show()




In [None]:
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d')
plt.title("Confusion Matrix")
plt.show()


In [None]:
import joblib

joblib.dump(model, "student_analysis_model.pkl")


In [None]:
from google.colab import files
files.download("student_analysis_model.pkl")


In [None]:
import joblib
joblib.dump(your_model, "student_model.pkl")

