In [1]:
pip install pandas scikit-learn matplotlib seaborn


Note: you may need to restart the kernel to use updated packages.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor

# Load dataset
def load_data(file_path):
    df = pd.read_csv(file_path)
    return df

# Preprocess dataset
def preprocess_data(df):
    # Check if 'score' column exists before dropping NaNs
    if 'final_score' not in df.columns:
        raise ValueError("The 'score' column is not found in the DataFrame.")
    
    # Drop NaNs only in the 'score' column to preserve other data
    df = df.dropna(subset=['final_score'])  
    
    # Assuming 'score' column exists for ranking, sort by it
    df = df.sort_values(by="final_score", ascending=False)  
    # Create a 'rank' column based on the sorted scores
    df['rank'] = np.arange(1, len(df) + 1)  
    return df

# Train model on full dataset
def train_model(X, y):
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X, y)
    return model

# Predict rank for a given score
def predict_rank(model, final_score):
    rank = model.predict([[final_score]])
    return int(rank[0])

# Main function
def main():
    file_path = "result (3).csv"  # Replace with your dataset file
    df = load_data(file_path)
    df = preprocess_data(df)  # Preprocess data, including creating 'rank' column

    # Define Features and Target
    X = df[["final_score"]].values
    y = df["rank"].values

    # Train Model on Full Dataset
    model = train_model(X, y)

    # Predict Example
    try:
        user_score = float(input("Enter NEET Score: "))
        predicted_rank = predict_rank(model, user_score)
        print(f"Predicted Rank for Score {user_score}: {predicted_rank}")
    except ValueError:
        print("Invalid input! Please enter a numeric score.")

if __name__ == "__main__":
    main()