In [1]:
import pandas as pd
import numpy as np

In [2]:
#reading dataset and checking its values.
dataset=pd.read_csv('round1_josaa_22.csv')
dataset.head(135)

Unnamed: 0,Institute,Academic Program Name,Quota,Seat Type,Gender,Opening Rank,Closing Rank,Round
0,Indian Institute of Technology Bhubaneswar,"Civil Engineering (4 Years, Bachelor of Techno...",AI,OPEN,Gender-Neutral,9193,11771,1
1,Indian Institute of Technology Bhubaneswar,"Civil Engineering (4 Years, Bachelor of Techno...",AI,OPEN,Female-only (including Supernumerary),16138,20164,1
2,Indian Institute of Technology Bhubaneswar,"Civil Engineering (4 Years, Bachelor of Techno...",AI,EWS,Gender-Neutral,1605,1744,1
3,Indian Institute of Technology Bhubaneswar,"Civil Engineering (4 Years, Bachelor of Techno...",AI,EWS,Female-only (including Supernumerary),3159,3159,1
4,Indian Institute of Technology Bhubaneswar,"Civil Engineering (4 Years, Bachelor of Techno...",AI,OBC-NCL,Gender-Neutral,3997,4297,1
...,...,...,...,...,...,...,...,...
130,Indian Institute of Technology Bhubaneswar,Metallurgical and Materials Engineering (5 Yea...,AI,SC,Gender-Neutral,1624,1624,1
131,Indian Institute of Technology Bhubaneswar,Metallurgical and Materials Engineering (5 Yea...,AI,ST,Gender-Neutral,1274,1274,1
132,Indian Institute of Technology Bombay,"Aerospace Engineering (4 Years, Bachelor of Te...",AI,OPEN,Gender-Neutral,577,2119,1
133,Indian Institute of Technology Bombay,"Aerospace Engineering (4 Years, Bachelor of Te...",AI,OPEN,Female-only (including Supernumerary),485,3968,1


In [3]:
#shape of dataset.
dataset.shape

(10022, 8)

In [4]:
#checking if there are any null values in the dataset.
dataset.isnull().any()

Institute                False
Academic Program Name    False
Quota                    False
Seat Type                False
Gender                   False
Opening Rank             False
Closing Rank             False
Round                    False
dtype: bool

In [5]:
#checking unique labels in the dataset.
dataset['Institute'].unique()


array(['Indian Institute of Technology Bhubaneswar',
       'Indian Institute of Technology Bombay',
       'Indian Institute of Technology Mandi',
       'Indian Institute of Technology Delhi',
       'Indian Institute of Technology Indore',
       'Indian Institute of Technology Kharagpur',
       'Indian Institute of Technology Hyderabad',
       'Indian Institute of Technology Jodhpur',
       'Indian Institute of Technology Kanpur',
       'Indian Institute of Technology Madras',
       'Indian Institute of Technology Gandhinagar',
       'Indian Institute of Technology Patna',
       'Indian Institute of Technology Roorkee',
       'Indian Institute of Technology (ISM) Dhanbad',
       'Indian Institute of Technology Ropar',
       'Indian Institute of Technology (BHU) Varanasi',
       'Indian Institute of Technology Guwahati',
       'Indian Institute of Technology Bhilai',
       'Indian Institute of Technology Goa',
       'Indian Institute of Technology Palakkad',
       'In

In [6]:
#removing rows in the dataset that contains alphabet 'P' to avoid complications.
dataset=dataset[~dataset['Opening Rank'].str.contains('P')]
dataset=dataset[~dataset['Closing Rank'].str.contains('P')]
dataset.shape

(9929, 8)

In [7]:
#dropping unnecessary columns.
dataset1=dataset.drop(['Quota','Academic Program Name','Round'],axis=1)
dataset1.head()

Unnamed: 0,Institute,Seat Type,Gender,Opening Rank,Closing Rank
0,Indian Institute of Technology Bhubaneswar,OPEN,Gender-Neutral,9193,11771
1,Indian Institute of Technology Bhubaneswar,OPEN,Female-only (including Supernumerary),16138,20164
2,Indian Institute of Technology Bhubaneswar,EWS,Gender-Neutral,1605,1744
3,Indian Institute of Technology Bhubaneswar,EWS,Female-only (including Supernumerary),3159,3159
4,Indian Institute of Technology Bhubaneswar,OBC-NCL,Gender-Neutral,3997,4297


In [8]:
#encoding data into 0's,1's
data = pd.get_dummies(dataset1, columns=['Seat Type', 'Gender'])
data.head()

Unnamed: 0,Institute,Opening Rank,Closing Rank,Seat Type_EWS,Seat Type_EWS (PwD),Seat Type_OBC-NCL,Seat Type_OBC-NCL (PwD),Seat Type_OPEN,Seat Type_OPEN (PwD),Seat Type_SC,Seat Type_SC (PwD),Seat Type_ST,Seat Type_ST (PwD),Gender_Female-only (including Supernumerary),Gender_Gender-Neutral
0,Indian Institute of Technology Bhubaneswar,9193,11771,0,0,0,0,1,0,0,0,0,0,0,1
1,Indian Institute of Technology Bhubaneswar,16138,20164,0,0,0,0,1,0,0,0,0,0,1,0
2,Indian Institute of Technology Bhubaneswar,1605,1744,1,0,0,0,0,0,0,0,0,0,0,1
3,Indian Institute of Technology Bhubaneswar,3159,3159,1,0,0,0,0,0,0,0,0,0,1,0
4,Indian Institute of Technology Bhubaneswar,3997,4297,0,0,1,0,0,0,0,0,0,0,0,1


In [9]:
#converting opening and closing rank columns to int
data['Opening Rank']=data['Opening Rank'].astype(int)
data['Closing Rank']=data['Closing Rank'].astype(int)

In [10]:
#Selecting Decision tree as our model is because in this datasaet we can get institute by checking som if conditions like whether seat type is matching
#or not. So I used Decision tree using ID3 algorithm.
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X = data.drop(['Institute'], axis=1)
y = data['Institute']

# Defining the decision tree classifier and fit it to the data
clf = DecisionTreeClassifier()
clf.fit(X, y)

#train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
y_pred = clf.predict(X_train)
print("Accuracy of the model is - ",accuracy_score(y_train, y_pred))

# function to predict the institute based on seat type, gender, and rank
def predict_institute(seat_type, gender, rank):
    # Finding the row of data that matches the input seat type and gender
    matched_rows = data[(data['Seat Type_'+seat_type] == 1) & (data['Gender_'+gender] == 1)]
    # Check if the input rank is between the opening and closing ranks
    for _, row in matched_rows.iterrows():
        if rank >= row['Opening Rank'] and rank <= row['Closing Rank']:
            return row['Institute']
    
    return None

Accuracy of the model is -  0.9959712954802972


In [11]:
#predictiong with seat type(SC),Gender(Female-only (including Supernumerary)),rank(3444)
predict_institute('SC','Female-only (including Supernumerary)',3444)

'Indian Institute of Technology Kharagpur'

In [12]:
#checking whether prediction is right or wrong.
dataset1.loc[1002]

Institute       Indian Institute of Technology Kharagpur
Seat Type                                             SC
Gender             Female-only (including Supernumerary)
Opening Rank                                        3444
Closing Rank                                        3444
Name: 1002, dtype: object

Hence, Our prediction is right.