In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [6]:

# Create a sample dataset
data = pd.read_csv('admission_data.csv')
data

Unnamed: 0,Student ID,Student Name,Field of Study,Year of Admission,Gender,hsc_p,hsc marks,Diploma Marks,diploma_p,SSC Marks,SSC percentage,Chance of Admit
0,165527,John,Engineering,2022,Male,75.00,750,880,8.00,785,85.0,0.75
1,433076,Alice,Computer Science,2022,Female,82.00,820,785,8.50,988,88.0,0.82
2,789123,Bob,Mathematics,2022,Male,90.00,900,692,9.20,894,94.0,0.92
3,654321,Eve,Physics,2022,Female,88.00,880,989,8.90,790,90.0,0.88
4,987654,Charlie,Biology,2022,Male,78.00,780,714,7.90,684,84.0,0.78
...,...,...,...,...,...,...,...,...,...,...,...,...
1200,213613,Dawn Beasley,Computer Science,2017,Female,90.85,727,941,89.65,490,98.1,0.96
1201,391845,Mr. Bradley Bowen III,Electrical Engineering,2022,Female,90.85,727,941,89.65,491,98.1,0.34
1202,680336,Daniel Stewart Jr.,Chemical Engineering,2016,Male,90.84,727,941,89.65,491,98.2,0.77
1203,952111,Michael Zavala,Electrical Engineering,2019,Male,90.84,727,941,89.65,491,98.3,0.89


In [7]:
# Feature engineering
data['average_marks'] = (data['hsc_p'] + data['diploma_p'] + data['SSC percentage']) / 3

In [8]:
features = ['hsc_p', 'diploma_p', 'SSC percentage', 'average_marks']

In [9]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    data[features], data['Chance of Admit'], test_size=0.2, random_state=42
)

In [10]:

# Define preprocessing pipeline
numeric_features = ['hsc_p', 'diploma_p', 'SSC percentage', 'average_marks']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),  # Impute missing values with the mean
    ('scaler', StandardScaler())
])

preprocessor = numeric_transformer

In [11]:
# Define the model pipeline with LinearRegression for regression
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

In [12]:
# Train the regression model
model.fit(X_train, y_train)

In [13]:
# Make predictions for a specific student
def predict_admission_chance():
    while True:
        try:
            # Get input Student ID from the user
            student_id = int(input("Enter the Student ID: "))
            
            # Check if the entered student ID is in the dataset
            if student_id in data['Student ID'].values.tolist():
                # Assuming 'Student ID' is a unique identifier for each student
                student_data = data[data['Student ID'] == student_id]

                # Extract features for prediction
                input_features = student_data[features]

                # Use the preprocessing pipeline to preprocess the input data
                input_features_preprocessed = preprocessor.transform(input_features)

                # Make the prediction
                admission_chance = model.predict(input_features_preprocessed)

                # Display the prediction
                print(f"Predicted admission chance for student {student_id}: {admission_chance[0]}")
                break
            else:
                print(f"Student with ID {student_id} not found in the dataset. Please try again.")
        except ValueError:
            print("Invalid input. Please enter a valid Student ID.")

# Example usage
predict_admission_chance()


Enter the Student ID:  482073


Predicted admission chance for student 482073: -618784711356.6936


