In [1]:
from PyPDF2 import PdfReader
import os
import pandas as pd



In [2]:
def get_files_from_directory(directory_path):
    pdf_files = []
    for filename in os.listdir(directory_path):
        if filename.endswith(".pdf"):
            pdf_files.append(filename)
    return pdf_files



In [3]:

def extract_text_from_pdf(pdf_path):
    reader = PdfReader(pdf_path)
    number_of_pages = len(reader.pages)
    page = reader.pages[0]
    text = page.extract_text()
    return text


In [4]:
def extract_name(pdf_text):
    lines = pdf_text.split('\n')
    
    for i, line in enumerate(lines):
        if "Student Name:" in line and i < len(lines) - 1:
            name = lines[i + 1].strip()
            return name
    
    return None

In [5]:
def extract_sgpa(pdf_text):
    lines = pdf_text.split('\n')
    sgpa_line = None
    
    for line in lines:
        if "SECOND YEAR SGPA" in line:
            sgpa_line = line
            break
    
    if sgpa_line:
        sgpa_parts = sgpa_line.split(":-")
        if len(sgpa_parts) > 1:
            sgpa = sgpa_parts[1].strip()
            return sgpa
    return None

In [6]:
if __name__ == "__main__":
    pdf_directory = os.getcwd() +"/marksheet"
    
    pdf_files = get_files_from_directory(pdf_directory)

    student_info_list = []

    for pdf_file in pdf_files:
        pdf_path = os.path.join(pdf_directory, pdf_file)
        info = extract_text_from_pdf(pdf_path)
        
        name = extract_name(info)
        sgpa = extract_sgpa(info)

        student_info = {
            "Name": name,
            "SGPA": sgpa
        }
        student_info_list.append(student_info)

    

In [7]:
    # for student in student_info_list:
    #     print("Name:", student["Name"])
    #     print("SGPA:", student["SGPA"])
    #     print("-" * 20)

    # Dataframe of Student data
    df = pd.DataFrame(student_info_list)
    
    output_csv = "student_info.csv"
    df.to_csv(output_csv, index=False)

    df = df.sort_values(by="SGPA", ascending=False)

    df["Rank"] = range(1, len(df) + 1)


    df

Unnamed: 0,Name,SGPA,Rank
21,RATHOD DINESH SHANKAR,8.52,1
53,MANASI DESHPANDE,8.50,2
34,SUSAR PATIL RUPALI RUSTUM,8.45,3
26,SHINDE PRANJALI PRAMOD,8.34,4
0,BHAMARE TEJAS SHANTANU,8.16,5
...,...,...,...
52,KAUSHIK SHRIKRISHNA KHARE,,69
54,KADPURKAR MANDAR MANGESH,,70
55,KUMAVAT NEEL AMOD,,71
64,KAMATHE SHANTANU DILIP,,72


In [8]:
    # Top 10 students
    top_10 = df[:10].reset_index(drop=True) 

    print("Top 10 Students:")
    top_10

Top 10 Students:


Unnamed: 0,Name,SGPA,Rank
0,RATHOD DINESH SHANKAR,8.52,1
1,MANASI DESHPANDE,8.5,2
2,SUSAR PATIL RUPALI RUSTUM,8.45,3
3,SHINDE PRANJALI PRAMOD,8.34,4
4,BHAMARE TEJAS SHANTANU,8.16,5
5,BHAMARE TEJAS SHANTANU,8.16,6
6,PATHAN ALISHA ASLAM,8.0,7
7,BIRHADE PRAGATI PURUSHOTTAM,7.84,8
8,JAMALE RUTUJA BHAGWAN,7.82,9
9,VIDHATE OMKAR AJINATH,7.68,10


In [9]:
    # Student who has backlog
    backlogs = df[df["SGPA"].isnull()].drop(columns=["Rank"]).reset_index(drop=True)

    print("Students who has backlogs or Result Remaining:")
    print("No. of students  : ", len(backlogs))
    backlogs

Students who has backlogs or Result Remaining:
No. of students  :  22


Unnamed: 0,Name,SGPA
0,KULKARNI ANURHUTA AVINASH,
1,KUMAVAT NEEL AMOD,
2,MAHAJAN TRUPTI SANJAY,
3,MANGNALE SHWETA SURESH,
4,NALAWADE NANDINI SANTOSHKUMAR,
5,OM,
6,RANADIVE SAUMYA SAMEER,
7,RODGE SAHIL DHANRAJ,
8,SHELKE AVISHKAR VINOD,
9,SHINDE AVINASH DEVIDAS,


In [10]:
    # Student who has backlog
    df.fillna(0)
    above_7 = df[df['SGPA'].astype(float) > 7.0].reset_index(drop=True)

    print("Student Above 7 CGPA:", len(above_7))

    above_7

Student Above 7 CGPA: 35


Unnamed: 0,Name,SGPA,Rank
0,RATHOD DINESH SHANKAR,8.52,1
1,MANASI DESHPANDE,8.5,2
2,SUSAR PATIL RUPALI RUSTUM,8.45,3
3,SHINDE PRANJALI PRAMOD,8.34,4
4,BHAMARE TEJAS SHANTANU,8.16,5
5,BHAMARE TEJAS SHANTANU,8.16,6
6,PATHAN ALISHA ASLAM,8.0,7
7,BIRHADE PRAGATI PURUSHOTTAM,7.84,8
8,JAMALE RUTUJA BHAGWAN,7.82,9
9,VIDHATE OMKAR AJINATH,7.68,10
