<h1 align="center"><b>KOTAK SALESIAN SCHOOL</b></h1>


<h2 align="center"><b>STUDENTS INFO 2024-25</b></h2>

## **Backup Files Before running New**

In [56]:
import os
import datetime

# * MySQL Credentials
DB_USER = "root"
DB_PASSWORD = "Hari@123"
DB_NAME = "schooldb"
BACKUP_DIR = "D:/mysql_backups"  # * Directory to save backups in

# * Ensure the backup directory exists
if not os.path.exists(BACKUP_DIR):
    os.makedirs(BACKUP_DIR)

# * Generate a timestamp for the backup file
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
backup_file = f"{BACKUP_DIR}/backup_{DB_NAME}_{timestamp}.sql"

# * Run MySQL Dump Command
dump_command = f"mysqldump -u {DB_USER} -p{DB_PASSWORD} {DB_NAME} > {backup_file}"
os.system(dump_command)

print(f"Backup saved to {backup_file}")


Backup saved to D:/mysql_backups/backup_schooldb_2025-06-24_21-28-06.sql


## **Import Libraries & Define Credentials**

In [57]:
import pandas as pd
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from sqlalchemy import create_engine, text
import urllib.parse

GOOGLE_JSON_PATH = r"D:\GITHUB\kotak-school-dbms\google_api_keys\woven-solution-446513-f2-6700b7a9f290.json"
GOOGLE_SHEET_NAME = "Fee Reports 2024-25"
MYSQL_CREDENTIALS = {
    "username": "root",
    "password": "Hari@123",
    "host": "localhost",
    "port": "3306",
    "database": "schooldb",
}
TABLE_NAME = "students_2024_25"

## **Extract Data from Google Sheet*

In [58]:
def fetch_data(sheet_name="Sheet1"):
    scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
    creds = ServiceAccountCredentials.from_json_keyfile_name(GOOGLE_JSON_PATH, scope)
    client = gspread.authorize(creds)

    try:
        # * Open the Google Spreadsheet
        spreadsheet = client.open("STUDENTS DETAILS 2024-25")
        
        # * Open the specific sheet (default is Sheet1)
        sheet = spreadsheet.worksheet("Overall")
    except gspread.SpreadsheetNotFound:
        raise Exception("❌ Spreadsheet not found! Ensure the name is correct and the service account has access.")
    except gspread.WorksheetNotFound:
        raise Exception(f"❌ Worksheet '{sheet_name}' not found! Ensure the name matches exactly.")
    
    # * Fetch data
    data = sheet.get_all_records(head=3)
    return pd.DataFrame(data)

In [59]:
# * Fetch data
student_info = fetch_data()

student_info.head()

Unnamed: 0,S.No,Adm No.,Name,Class,Gender,Mother's Name,Father's Name,Pen Number,D.O.B,Ph. No.,Religion,Caste,Sub Caste,2nd Lang.,Remarks,Class Nos,JOINED YEAR,GRADES
0,1,17159,CHAITHRA PRESINGU,Pre KG,Female,,P VENKATESH,,13-12-2021,7093316614,Hindu,BC - A,,Telugu,,1,,1
1,2,16935,DHRUVITA PRAARTHANA SAVARAPU,Pre KG,Female,,S VENKATESH,,18-07-2021,9912246176,Hindu,SC,JAMBAVULU,Telugu,NO CASTE CERTIFICATE,1,,1
2,3,17158,DHRUVITHA ALPHONSEY SONGA,Pre KG,Female,,RAVI KUMAR SONGA,,28-07-2021,8341709383,Hindu,SC,MADIGA,Telugu,NO CASTE CERTIFICATE,1,,1
3,4,17160,HARICHANDANA MARADANA,Pre KG,Female,,M TARAKESWARA NAIDU,,18-12-2020,8464066346,Hindu,BC - D,,,,1,,1
4,5,16973,JYOTHIRMAE NADIGATLA,Pre KG,Female,,N UDAYA BHASKARA RAO,,11-11-2021,9701308587,Hindu,BC - A,,Telugu,,1,,1


## **Clean Extracted Data*

In [60]:
import pandas as pd

def clean_data(df):
    # * Make a copy to avoid SettingWithCopyWarning
    df = df.copy()

    # * Rename columns
    df.columns = [
        'SNo', 'AdmissionNo', 'STUDENT_NAME', 'Class', 'Gender', 'MotherName', 
        'FatherName', 'PenNo', 'DOB', 'Mobile', 'Religion', 'Caste', 
        'SubCaste', 'IIndLang', 'Remarks', 'ClassNo', 'JoinedYear', "Grades"
    ]

    # * Keep only valid date formats (DD-MM-YYYY)
    # *df = df[df["DOB"].str.match(r'^\d{1,2}-\d{1,2}-\d{4}$', na=False)]

    # * Convert DOB to MySQL format (YYYY-MM-DD)
    df["DOB"] = pd.to_datetime(df["DOB"], format="%d-%m-%Y", errors='coerce').dt.strftime("%Y-%m-%d")

    # * Sort by ClassNo first, then SNo for logical ordering
    df = df.sort_values(by=["ClassNo", "SNo"], ascending=[True, True])

    # * Drop 'APAAR Status' column if it exists
    if "Grades" in df.columns:
        df = df.drop(columns=["Grades"])

    # * Reset SNo after sorting
    df["SNo"] = range(1, len(df) + 1)

    # * Convert 'JoinedYear' safely to integer (handling empty values)
    df["JoinedYear"] = pd.to_numeric(df["JoinedYear"], errors="coerce").astype("Int64")
    
    df.to_csv(r"D:\GITHUB\kotak-school-dbms\output_data\students_table_2024_25.csv", index=False)
    df.to_csv(r"D:\GITHUB\student-management-system\data\students_data.csv", index=False)

    return df


## **Update PostgreSQL Database**

In [61]:
def update_database(df):
    password = urllib.parse.quote(MYSQL_CREDENTIALS["password"])
    engine = create_engine(f"mysql+pymysql://{MYSQL_CREDENTIALS['username']}:{password}"
                           f"@{MYSQL_CREDENTIALS['host']}:{MYSQL_CREDENTIALS['port']}/{MYSQL_CREDENTIALS['database']}")

    try:
        with engine.connect() as conn:
            # * Step 1: Check if unique constraint already exists
            index_check = conn.execute(text(f"SHOW INDEX FROM {TABLE_NAME} WHERE Column_name = 'AdmissionNo';"))
            existing_indexes = index_check.fetchall()
            
            # * Step 2: Truncate the table
            conn.execute(text(f"TRUNCATE TABLE {TABLE_NAME};"))
            print(f"✅ All records from the '{TABLE_NAME}' table have been deleted.\n")

            # * Step 3: Add unique constraint only if it doesn't exist
            if len(existing_indexes) == 0:  # No existing unique constraint
                conn.execute(text(f"ALTER TABLE {TABLE_NAME} ADD UNIQUE (AdmissionNo);\n"))
                print(f"✅ Unique constraint added to 'AdmissionNo' in the '{TABLE_NAME}' table.\n")
            else:
                print(f"⚠️ Unique constraint on 'AdmissionNo' already exists. Skipping addition.\n")

            # * Step 4: Insert data into the table
            df.to_sql(name=TABLE_NAME, con=engine, if_exists='append', index=False)
            print(f"✅ Data successfully inserted into the '{TABLE_NAME}' table.\n")

            # * Step 5: Sort the table by ClassNo and SNo (in MySQL)
            conn.execute(text(f"ALTER TABLE {TABLE_NAME} ORDER BY ClassNo ASC, SNo ASC;"))
            print(f"✅ Data in the '{TABLE_NAME}' table sorted by ClassNo and SNo.\n")

    except Exception as e:
        print(f"❌ An error occurred: {e}")


## **Run the Main Function**

In [62]:
if __name__ == "__main__":
    
    # * Fetch data
    student_info = fetch_data()
    print("✅ Data fetched successfully.\n")

    # * Clean data
    student_info = clean_data(student_info)
    print("✅ Data cleaned successfully.\n")
    print("✅ Columns are:\n",student_info.columns)

    # * Update database
    update_database(student_info)
    print("✅ Process completed successfully.\n")

    # * Print the full DataFrame
    # *print(student_info.to_string())  # * Print the entire DataFrame in a readable format


✅ Data fetched successfully.

✅ Data cleaned successfully.

✅ Columns are:
 Index(['SNo', 'AdmissionNo', 'STUDENT_NAME', 'Class', 'Gender', 'MotherName',
       'FatherName', 'PenNo', 'DOB', 'Mobile', 'Religion', 'Caste', 'SubCaste',
       'IIndLang', 'Remarks', 'ClassNo', 'JoinedYear'],
      dtype='object')
✅ All records from the 'students_2024_25' table have been deleted.

⚠️ Unique constraint on 'AdmissionNo' already exists. Skipping addition.

✅ Data successfully inserted into the 'students_2024_25' table.

✅ Data in the 'students_2024_25' table sorted by ClassNo and SNo.

✅ Process completed successfully.



<h2 align="center"><b>FEE REPORT 2024-25</b></h2>

# *## **Google Console Service Account: myschooldb@woven-solution-446513-f2.iam.gserviceaccount.com*

## **Import Necessary Libraries & Define Global Variables*

In [63]:
import pandas as pd
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from sqlalchemy import create_engine, text
import urllib.parse

GOOGLE_JSON_PATH = r"D:\GITHUB\kotak-school-dbms\google_api_keys\woven-solution-446513-f2-5ffd100e19c7.json"
GOOGLE_SHEET_NAME = "Fee Reports 2024-25"
MYSQL_CREDENTIALS = {
    "username": "root",
    "password": "Hari@123",
    "host": "localhost",
    "port": "3306",
    "database": "schooldb",
}
TABLE_NAME = "fees_table_2024_25"


## **Function for Fetching Data*

In [64]:
def fetch_data(sheet_name="Sheet1"):
    scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
    creds = ServiceAccountCredentials.from_json_keyfile_name(GOOGLE_JSON_PATH, scope)
    client = gspread.authorize(creds)

    try:
        # * Open the Google Spreadsheet
        spreadsheet = client.open("Fee Reports 2024-25")
        
        # * Open the specific sheet (default is Sheet1)
        sheet = spreadsheet.worksheet("Overall Sheet")
    except gspread.SpreadsheetNotFound:
        raise Exception("❌ Spreadsheet not found! Ensure the name is correct and the service account has access.")
    except gspread.WorksheetNotFound:
        raise Exception(f"❌ Worksheet '{sheet_name}' not found! Ensure the name matches exactly.")
    
    # * Fetch data
    data = sheet.get_all_records(head=3)
    return pd.DataFrame(data)


## **Function for Cleaning Data*

In [65]:
def clean_data(df):
    df = df[:-1][:-6]
    df.columns = ['SNo', 'STUDENT_NAME', 'ADM_NO', 'FB_NO', 'CLASS',
                  'Term1', 'Term2', 'Term3', 'Term4', 'TotalFeePaid',
                  'Discount_Concession', 'TotalFeeDue', 'PermissionUpto',
                  'Fine', 'PaymentStatus', 'ClassNo',"AcNo",'Concession_type', "dummy"]

    columns_to_convert = ["Term1", "Term2", "Term3", "Term4", "TotalFeePaid",
                          "Discount_Concession", "TotalFeeDue", "Fine"]
    df[columns_to_convert] = df[columns_to_convert].apply(pd.to_numeric, errors='coerce').fillna(0)

    df = df.drop(columns=["AcNo", 'Concession_type', "dummy"])

    df["SNo"] = range(1, len(df) + 1)

    df = df.sort_values(by=["SNo"])

    df["TotalFees"] = df["TotalFeePaid"] + df["Discount_Concession"] + df["TotalFeeDue"]
    
    df.to_csv(r"D:\GITHUB\kotak-school-dbms\output_data\fees_report_2024_25.csv", index=False)
    
    PaymentStatus = df[["PaymentStatus"]].drop_duplicates().reset_index(drop=True)
    PaymentStatus["PaymentStatusId"] = range(1, len(PaymentStatus) + 1)
    PaymentStatus = PaymentStatus[["PaymentStatusId", "PaymentStatus"]]
    PaymentStatus.to_csv(r"D:\GITHUB\kotak-school-dbms\output_data\payment_status_table_2024_25.csv", index=False)
    print("✅ Payment Status Table created successfully.\n")
    
    df = pd.merge(df, PaymentStatus, on="PaymentStatus", how="left")
    
    df.drop(columns=["PermissionUpto","PaymentStatus"], inplace=True)
    

    return df


## **Function for Updating the Database*

In [66]:
def update_database(df):
    password = urllib.parse.quote(MYSQL_CREDENTIALS["password"])
    engine = create_engine(f"mysql+pymysql://{MYSQL_CREDENTIALS['username']}:{password}"
                           f"@{MYSQL_CREDENTIALS['host']}:{MYSQL_CREDENTIALS['port']}/{MYSQL_CREDENTIALS['database']}")

    try:
        with engine.connect() as conn:
            conn.execute(text(f"TRUNCATE TABLE {TABLE_NAME};"))
            print(f"✅ All records from the '{TABLE_NAME}' table have been deleted.\n")

            conn.execute(text(f"ALTER TABLE {TABLE_NAME} ADD UNIQUE (ADM_NO);\n"))
            print(f"✅ Unique constraint added to ADM_NO in the '{TABLE_NAME}' table.\n")

            df.to_sql(name=TABLE_NAME, con=engine, if_exists='append', index=False)
            print(f"✅ Data successfully inserted into the '{TABLE_NAME}' table.")

    except Exception as e:
        print(f"❌ An error occurred: {e}")


## **Main Execution Block*

In [67]:
if __name__ == "__main__":
    # * Fetch data
    fees_df = fetch_data()
    print("✅ Data fetched successfully.\n")
    print(fees_df.to_string())

    # * Clean data
    fees_df = clean_data(fees_df)
    print("✅ Data cleaned and transformed successfully.\n")
    print("✅ Columns are:\n", fees_df.columns)


    # * Update database
    update_database(fees_df)


✅ Data fetched successfully.

     S.No                                       STUDENT NAME  ADM NO      FB NO     CLASS          1st Term JUN - AUG 2nd Term SEP - NOV 3rd Term DEC - FEB    4th Term MAR - MAY Total \nFee Paid Discount / Concession Total Fee \nDue Permission upto   Fine Payment Status Class No      AC No Concession_type        STAFF NAME
0       1                                  CHAITHRA PRESINGU   17159       3001    Pre KG  NO FEES FOR 1ST & 2ND TERM                                  5250                  5250            10500                                     0                            Total Paid        1  A/C No. 1                                  
1       2                       DHRUVITA PRAARTHANA SAVARAPU   16935       3002    Pre KG                                                              5250                  5250            10500                                     0                            Total Paid        1  A/C No. 1                              

In [68]:
fees_df

Unnamed: 0,SNo,STUDENT_NAME,ADM_NO,FB_NO,CLASS,Term1,Term2,Term3,Term4,TotalFeePaid,Discount_Concession,TotalFeeDue,Fine,ClassNo,TotalFees,PaymentStatusId
0,1,CHAITHRA PRESINGU,17159,3001,Pre KG,0.0,0.0,5250.0,5250.0,10500,0.0,0,0.0,1,10500.0,1
1,2,DHRUVITA PRAARTHANA SAVARAPU,16935,3002,Pre KG,0.0,0.0,5250.0,5250.0,10500,0.0,0,0.0,1,10500.0,1
2,3,DHRUVITHA ALPHONSEY SONGA,17158,3003,Pre KG,0.0,0.0,0.0,0.0,0,0.0,10500,0.0,1,10500.0,2
3,4,HARI CHANDANA MARDANA,17160,3004,Pre KG,0.0,0.0,5250.0,5250.0,10500,0.0,0,0.0,1,10500.0,1
4,5,JYOTHIRMAE NADIGATLA,16973,3005,Pre KG,0.0,0.0,5250.0,5250.0,10500,0.0,0,0.0,1,10500.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1727,1728,SHANMUKH SALAPU,15872,1812,X - C,10450.0,10450.0,10450.0,10450.0,41800,0.0,0,0.0,43,41800.0,1
1728,1729,SIVA HARSHA VELAMAREDDI,15741,1807,X - C,10450.0,10450.0,10450.0,10450.0,41800,0.0,0,0.0,43,41800.0,1
1729,1730,UPENDRA MUTCHI,13863,1822,X - C,10450.0,10450.0,10450.0,6450.0,37800,4000.0,0,100.0,43,41800.0,1
1730,1731,VARUN PRESINGI,14360,1832,X - C,10450.0,10450.0,10450.0,10450.0,41800,0.0,0,100.0,43,41800.0,1


<h2 align="center"><b>DAY WISE REPORTS 2024-25</b></h2>

## **Import Required Libraries*

In [69]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import urllib.parse
from sqlalchemy import create_engine, text


## **Define Login Credentials and MySQL Credentials*

In [70]:
# * 🔹 Login Credentials
login_url = "https://app.myskoolcom.tech/kotak_vizag/login"
data_url = "https://app.myskoolcom.tech/kotak_vizag/office_fee/fee_reports_day_wise_receipt_wise_print"

credentials = {
    "uname": "harikiran",
    "psw": "812551"
}

MYSQL_CREDENTIALS = {
    "username": "root",
    "password": "Hari@123",
    "host": "localhost",
    "port": "3306",
    "database": "schooldb",
}

TABLE_NAME = "daywise_fees_collection_2024_25"

## **Define Functions for Each Step*

In [71]:
def login_to_website():
    session = requests.Session()
    login_response = session.post(login_url, data=credentials)
    if "Invalid" in login_response.text:
        print("❌ Login failed! Check credentials.\n")
        return None
    else:
        print("✅ Login successful!\n")
        return session


## **Function to Fetch Fee Report Page*

In [72]:
def fetch_fee_report_page(session):
    response = session.get(data_url)
    soup = BeautifulSoup(response.text, "html.parser")
    table = soup.find("table")
    return table

## **Function to Extract Data from Table*

In [73]:
def extract_data_from_table(table):
    rows = []
    for tr in table.find_all("tr"):
        cols = [td.text.strip() for td in tr.find_all("td")]
        if cols:
            rows.append(cols)
    
    header_row = [
        "SNo", "RecieptNo", "Class", "AdmissionNo", "StudentName", 
        "Date", "-", "Abacus / Vediic Maths", "TERM FEE", 
        "ReceivedAmount", "Remarks"
    ]
    
    df = pd.DataFrame(rows, columns=header_row)
    return df


## **Function to Clean Data*

In [74]:
def clean_data(df):
    # * Convert 'Date' column to proper datetime format
    df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%Y', errors='coerce')

    # * Ensure 'AdmissionNo' is treated as a string (no conversion to numeric)
    df['AdmissionNo'] = df['AdmissionNo'].astype(str)

    # * Find the index where "TERM" first appears in the "SNo" column
    term_index = df[df["SNo"].str.contains("TERM", na=False)].index
    
    df["Class"] = df["Class"].str.replace("/", " - ")

    if not term_index.empty:
        # * Drop all rows from the first occurrence of "TERM" onward
        df = df.iloc[:term_index[0]]

        # * Drop unnecessary columns
        df = df.drop(columns=["-", "Abacus / Vediic Maths", "TERM FEE"])


    return df


## **Function to Update Database*

In [75]:
def update_database(df):
    password = urllib.parse.quote(MYSQL_CREDENTIALS["password"])
    engine = create_engine(f"mysql+pymysql://{MYSQL_CREDENTIALS['username']}:{password}"
                           f"@{MYSQL_CREDENTIALS['host']}:{MYSQL_CREDENTIALS['port']}/{MYSQL_CREDENTIALS['database']}")

    try:
        with engine.connect() as conn:
            # * # * 🔹 Alter table to add Remarks column
            # * conn.execute(text(f"ALTER TABLE {TABLE_NAME} ADD COLUMN Remarks VARCHAR(255);"))
            # * print(f"Step3: Column 'Remarks' added to the '{TABLE_NAME}' table.\n")
            
            # * # * 🔹 Alter column type for AdmissionNo to VARCHAR
            # * conn.execute(text(f"ALTER TABLE {TABLE_NAME} MODIFY COLUMN `AdmissionNo` VARCHAR(20);"))
            # * print(f"Step3: Column 'AdmissionNo' type modified to VARCHAR(20).\n")

            # * 🔹 Truncate the table before inserting new data
            conn.execute(text(f"TRUNCATE TABLE {TABLE_NAME};"))
            print(f"✅ All records from the '{TABLE_NAME}' table have been deleted.\n")

            # * 🔹 Insert the data into the table
            df.to_sql(name=TABLE_NAME, con=engine, if_exists='append', index=False)
            print(f"✅ Data successfully inserted into the '{TABLE_NAME}' table.\n")

    except Exception as e:
        print(f"An error occurred: {e}")


## **Main Execution Flow*

In [76]:
def main():
    # * Log in to the website
    session = login_to_website()
    if session is None:
        return

    # * Fetch the fee report page
    table = fetch_fee_report_page(session)

    if table:
        print("✅ Table found! Extracting data...\n")

        # * Extract data from the table
        df = extract_data_from_table(table)

        # * Clean the data
        df = clean_data(df)

        # * Save to CSV (optional)
        df.to_csv(r"D:\GITHUB\kotak-school-dbms\output_data\daywise_fee_collection_report_2024_25.csv", index=False)
        print("✅ Data saved to fee_collection_report.csv\n")

        # * Insert data into MySQL database
        update_database(df)
        print("✅ Columns are:\n", df.columns)

        # * Print sample data
        print(f"✅ {len(df)} Records Entered into database")
        print(df.to_string())

    else:
        print("❌ Table not found! The page structure might have changed.")


## **Run the Main Function*

In [77]:
# * Run the main function
main()


✅ Login successful!

✅ Table found! Extracting data...

✅ Data saved to fee_collection_report.csv

✅ All records from the 'daywise_fees_collection_2024_25' table have been deleted.

✅ Data successfully inserted into the 'daywise_fees_collection_2024_25' table.

✅ Columns are:
 Index(['SNo', 'RecieptNo', 'Class', 'AdmissionNo', 'StudentName', 'Date',
       'ReceivedAmount', 'Remarks'],
      dtype='object')
✅ 5780 Records Entered into database
       SNo RecieptNo     Class AdmissionNo                                        StudentName       Date ReceivedAmount                                                                                                       Remarks
0        1     00001    II - D       16335                                        SHAIK SALMA 2024-07-15           7150                                                                                                              
1        2     00002    II - A       16365                               SHEIK VISHITA KAUSA

<h2 align="center"><b>ATTENDANCE REPORT 2024-25</b></h2>

## **Collecting Reports**

In [78]:
# from selenium import webdriver
# from selenium.webdriver.common.by import By
# from selenium.webdriver.common.keys import Keys
# from selenium.webdriver.chrome.service import Service
# from selenium.webdriver.support.ui import WebDriverWait
# from selenium.webdriver.support import expected_conditions as EC
# import os
# import time
# from datetime import datetime

# # 🔹 Constants
# login_url = "https://app.myskoolcom.tech/kotak_vizag/login"
# attendance_url = "https://app.myskoolcom.tech/kotak_vizag/admin/attedance_grid"

# credentials = {
#     "uname": "harikiran",
#     "psw": "812551"
# }

# # 🔹 Set ChromeDriver path
# chromedriver_path = r"E:\chromedriver-win64\chromedriver.exe"

# # 🔹 Set the download folder
# download_folder = r"D:\GITHUB\kotak-school-dbms\source_data\Attendance Reports"

# # 🔹 Set the required filename
# final_filename = "AttendanceUptoMarch_2024_25.csv"
# final_filepath = os.path.join(download_folder, final_filename)

# # 🔹 Chrome options
# chrome_options = webdriver.ChromeOptions()
# prefs = {"download.default_directory": download_folder}  # ✅ Set download directory
# chrome_options.add_experimental_option("prefs", prefs)

# # 🔹 Start WebDriver
# service = Service(chromedriver_path)
# driver = webdriver.Chrome(service=service, options=chrome_options)
# wait = WebDriverWait(driver, 10)

# # **Step 1: Login to the website**
# driver.get(login_url)
# wait.until(EC.presence_of_element_located((By.NAME, "uname"))).send_keys(credentials["uname"])
# driver.find_element(By.NAME, "psw").send_keys(credentials["psw"])
# driver.find_element(By.NAME, "psw").send_keys(Keys.RETURN)

# print("✅ Logged in successfully!")

# # **Step 2: Go to Attendance Page**
# time.sleep(5)  # Wait for login
# driver.get(attendance_url)

# # **Step 3: Set Date Range**
# start_date = "2025-01-01"
# end_date = "2025-03-31"

# # **Find and set "From Date"**
# from_date_input = wait.until(EC.presence_of_element_located((By.ID, "from_attendance_date")))
# driver.execute_script("arguments[0].removeAttribute('readonly')", from_date_input)
# from_date_input.clear()
# from_date_input.send_keys(start_date)

# # **Find and set "To Date"**
# to_date_input = wait.until(EC.presence_of_element_located((By.ID, "to_attendance_date")))
# driver.execute_script("arguments[0].removeAttribute('readonly')", to_date_input)
# to_date_input.clear()
# to_date_input.send_keys(end_date)

# print(f"✅ Date range set: {start_date} to {end_date}")

# # **Step 4: Click "Download CSV"**
# try:
#     download_button = wait.until(EC.element_to_be_clickable((By.ID, "smaplecsv")))  # ✅ Use correct button ID
#     download_button.click()
#     print("✅ Download initiated!")
# except:
#     print("❌ Error: Download button not found!")

# # **Step 5: Wait for the file to download**
# time.sleep(10)  # Allow time for download

# # **Step 6: Delete existing file if it already exists**
# try:
#     if os.path.exists(final_filepath):
#         os.remove(final_filepath)
#         print(f"🗑️ Deleted existing file: {final_filename}")
# except Exception as e:
#     print(f"❌ Error deleting existing file: {e}")

# # **Step 7: Rename the latest downloaded file**
# try:
#     # Find the most recently downloaded file
#     downloaded_files = sorted(
#         [f for f in os.listdir(download_folder) if f.endswith(".csv")],
#         key=lambda x: os.path.getctime(os.path.join(download_folder, x)),
#         reverse=True,
#     )
    
#     if downloaded_files:
#         latest_file = os.path.join(download_folder, downloaded_files[0])
#         os.rename(latest_file, final_filepath)
#         print(f"✅ File renamed to: {final_filename}")
#     else:
#         print("❌ No downloaded CSV file found!")
# except Exception as e:
#     print(f"❌ Error renaming file: {e}")

# # **Step 8: Close browser**
# driver.quit()
# print("✅ Process completed successfully!")


In [79]:
# import sys
# import time

# # Simulating a download waiting time
# wait_time = 15  # Total wait time in seconds

# print("⏳ Waiting for download...", end="", flush=True)

# for i in range(wait_time):
#     time.sleep(1)
#     sys.stdout.write(".")
#     sys.stdout.flush()


## **📌 Step 1: Import Libraries**

In [80]:
# import pandas as pd
# from sqlalchemy import create_engine, text
# import logging

# # * Configure logging
# logging.basicConfig(filename=r"D:\GITHUB\kotak-school-dbms\output_data\attendance_report_2024_25.log", level=logging.ERROR, 
#                     format="%(asctime)s - %(levelname)s - %(message)s")


## **📌 Step 2: Define MySQL Credentials & Table Name*

In [81]:
# # * MySQL Credentials
# MYSQL_CREDENTIALS = {
#     "username": "root",
#     "password": "Hari@123",
#     "host": "localhost",
#     "port": "3306",
#     "database": "schooldb",
# }
# TABLE_NAME = "attendance_report_2024_25"


# # **📌 Step 3: Load and Clean Data**

In [82]:
# ## **📌 Step 3: Load and Clean Data (Updated)*
# def load_and_clean_data(file1, file2, file3):


#     # * Load Data
#     df1 = pd.read_csv(file1)
#     df2 = pd.read_csv(file2)
#     df3 = pd.read_csv(file3)

#     # * Standardize column names
#     for df in [df1, df2, df3]:
#         df.columns = df.columns.str.strip().str.replace('"', '', regex=False)

#     # * Merge DataFrames on 'Students Number' using outer join
#     df = df1.merge(df2, on='Students Number', how='outer').merge(df3, on='Students Number', how='outer')

#     # * Identify and handle duplicate columns
#     common_fields = ['Name', 'Class']
#     for field in common_fields:
#         df[field] = df.pop(f"{field}_x").combine_first(df.pop(f"{field}_y"))

#     # * Drop remaining duplicate columns
#     drop_columns = [col for col in df.columns if '_x' in col or '_y' in col]
#     df = df.drop(columns=drop_columns, errors='ignore')

#     # * Rename 'Students Number' to 'AdmissionNo'
#     df = df.rename(columns={"Students Number": "AdmissionNo"})

#     # * Reorder Columns
#     column_order = ['AdmissionNo', 'Name', 'Class'] + [col for col in df.columns if col not in ['AdmissionNo', 'Name', 'Class']]
#     df = df[column_order]

#     # * Drop Unnecessary Columns
#     columns_to_drop = ["Present Days", "Absent Days", "Toral Working Days"]  # * Ensure correct column names
#     df = df.drop(columns=[col for col in columns_to_drop if col in df.columns], errors='ignore')


#     return df




# **📌 Step 4: Process Attendance Data**

In [83]:
# import pandas as pd
# import numpy as np

# def process_attendance_data(df):

#     # * Step 1: Clean 'AdmissionNo'
#     df = df[~(df["AdmissionNo"].astype(str) == "786") & ~df["AdmissionNo"].astype(str).str.match(r"^[a-zA-Z]")].copy()

#     # * Step 2: Extract Class and Section
#     df["Class"] = df["Class"].astype(str).str.replace(r"ICSE \((.*?)\)", r"\1", regex=True)
    
#     student_df = pd.read_csv(r"D:\GITHUB\kotak-school-dbms\output_data\fees_report_2024_25.csv")
    
#     print("✅ Students Before Merging\n", len(df["AdmissionNo"].unique()))
    
#     # * Step 3: Keep only rows where AdmissionNo is in student_df
#     df = df[df["AdmissionNo"].isin(student_df["ADM_NO"])]
    
#     print("✅ Students After Merging\n", len(df["AdmissionNo"].unique()))
    
#     # * Step 3: Unpivot DataFrame
#     df_unpivot = pd.melt(df, id_vars=["AdmissionNo", "Name", "Class"], 
#                         var_name="Date", value_name="AttendanceStatus")
    
#     # * Step 5: Drop rows with 'Holiday' status
#     df_unpivot = df_unpivot[~df_unpivot["AttendanceStatus"].eq("H")].reset_index(drop=True)

#     # * Step 4: Convert 'Date' to datetime
#     df_unpivot["Date"] = pd.to_datetime(df_unpivot["Date"], format='%d.%m.%Y', errors='coerce')

#     # * Step 5: Log invalid 'Date' values
#     if df_unpivot["Date"].isna().sum() > 0:
#         print("⚠️ Warning: Some Date values were invalid and converted to NaT.")

#     # * Step 6: Sorting
#     df_unpivot = df_unpivot.sort_values("Date", ascending=False).reset_index(drop=True)

#     # * Step 7: Find the first attendance date for each student
#     first_attendance_dates = df_unpivot[df_unpivot['AttendanceStatus'].notna()].groupby('AdmissionNo')['Date'].min()

#     # * Step 8: Assign 'Not Joined' if the date is before the student's first attendance
#     df_unpivot['AttendanceStatus'] = df_unpivot.apply(
#         lambda row: "Not Joined" if row['Date'] < first_attendance_dates.get(row['AdmissionNo'], row['Date']) else row['AttendanceStatus'],
#         axis=1
#     )
    
#     # * Step 9: Prioritize Attendance Status
#     priority_map = {'P': 2, 'A': 1, 'H': 3, 'Not Joined': 4, 'TC': 5}
#     df_unpivot['Priority'] = df_unpivot["AttendanceStatus"].map(priority_map)

#     df_unpivot = df_unpivot.sort_values(by=['AdmissionNo', 'Date', 'Priority']) \
#                             .drop_duplicates(subset=['AdmissionNo', 'Date'], keep='first') \
#                             .drop(columns=['Priority'])
                                
#     # * Step 10: Final sorting
#     df_unpivot = df_unpivot[['Date', 'AdmissionNo', 'Name', 'Class', 'AttendanceStatus']]
#     df_unpivot.sort_values(by=['Date'], ascending=False, inplace=True)
    
#     df_unpivot['Class'] = df_unpivot['Class'].str.replace("Pre KG - ", "Pre KG")

#     # * Step 11: Replace Attendance Status with meaningful labels
#     df_unpivot["AttendanceStatus"] = df_unpivot["AttendanceStatus"].replace({
#         'P': "Present", 'A': "Absent", 'H': "Holiday"})

#     # * Step 13: Class & Section Mapping
#     class_section_mapping = {
#     "Pre KG": 1, "LKG - A": 2, "LKG - B": 3, "UKG - A": 4, "UKG - B": 5, "UKG - C": 6,
#     "I - A": 7, "I - B": 8, "I - C": 9, "I - D": 10, "II - A": 11, "II - B": 12, "II - C": 13, "II - D": 14,
#     "III - A": 15, "III - B": 16, "III - C": 17, "III - D": 18, "IV - A": 19, "IV - B": 20, "IV - C": 21, "IV - D": 22,
#     "V - A": 23, "V - B": 24, "V - C": 25, "V - D": 26, "VI - A": 27, "VI - B": 28, "VI - C": 29, "VI - D": 30,
#     "VII - A": 31, "VII - B": 32, "VII - C": 33, "VII - D": 34, "VIII - A": 35, "VIII - B": 36, "VIII - C": 37,
#     "IX - A": 38, "IX - B": 39, "IX - C": 40, "X - A": 41, "X - B": 42, "X - C": 43}

#     df_unpivot['ClassNo'] = df_unpivot['Class'].map(class_section_mapping)

#     # * Step 14: Grade Mapping
#     grade_mapping = [
#         ("Pre KG", 1), ("LKG", 2), ("UKG", 3),
#         ("I", 4), ("II", 5), ("III", 6), ("IV", 7), ("V", 8),
#         ("VI", 9), ("VII", 10), ("VIII", 11), ("IX", 12), ("X", 13)
#     ]

#     conditions = [df_unpivot['Class'].str.contains(fr"\b{k}\b", na=False, regex=True) for k, _ in grade_mapping]
#     choices = [v for _, v in grade_mapping]
#     df_unpivot['classId'] = np.select(conditions, choices, default=0)

#     # * Step 15: AttendanceStatus Mapping
#     AttendanceStatus_mapping = [("Absent", 1), ("Present", 2), ("Not Joined", 3), ("Holiday", 4)]
#     conditions = [df_unpivot['AttendanceStatus'].str.contains(k, na=False) for k, _ in AttendanceStatus_mapping]
#     choices = [v for _, v in AttendanceStatus_mapping]
#     df_unpivot['AttendanceStatusId'] = np.select(conditions, choices, default=0)

#     # * Step 16: Branch Mapping
#     branch_mapping = [
#         ('Pre KG', 1), ('LKG', 1), ('UKG', 1),
#         ('I', 2), ('II', 2), ('III', 2), ('IV', 2), ('V', 2),
#         ('VI', 3), ('VII', 3), ('VIII', 3), ('IX', 3), ('X', 3)
#     ]

#     conditions = [df_unpivot['Class'].str.contains(fr"\b{k}\b", na=False, regex=True) for k, _ in branch_mapping]
#     choices = [v for _, v in branch_mapping]
#     df_unpivot['branchId'] = np.select(conditions, choices, default=0)

#     # * Step 17: Branch Name Mapping
#     branch_name_mapping = {1: 'Kindergarten', 2: 'Primary', 3: 'Higher'}
#     df_unpivot['branchName'] = df_unpivot['branchId'].map(branch_name_mapping)
    

#     grade_mapping_reversed = {
#     1: "Pre KG", 2: "LKG", 3: "UKG",
#     4: "I", 5: "II", 6: "III", 7: "IV", 8: "V",
#     9: "VI", 10: "VII", 11: "VIII", 12: "IX", 13: "X"
# }
    

#     df_unpivot['className'] = df_unpivot['classId'].map(grade_mapping_reversed)

#     # * Step 19: Final DataFrame Cleanup
#     class_sec_ids = df_unpivot[["ClassNo", "Class", "classId", "className", "branchId", "branchName",]].drop_duplicates(subset=["ClassNo"])
#     class_sec_ids = class_sec_ids.sort_values(by=['ClassNo']).reset_index(drop=True)
#     class_sec_ids.to_csv(r"D:\GITHUB\kotak-school-dbms\output_data\class_section_grade_table_2024_25.csv", index=False)

#     # * Step 19: Final DataFrame Cleanup
#     # * Step 19: Final DataFrame Cleanup
#     class_colors = {
#         1: "# *FFC0CB",  # * Pre KG - Pink
#         2: "# *FFD700",  # * LKG - Gold
#         3: "# *FFA07A",  # * UKG - Light Salmon
#         4: "# *ADD8E6",  # * I - Light Blue
#         5: "# *90EE90",  # * II - Light Green
#         6: "# *FFA500",  # * III - Orange
#         7: "# *87CEEB",  # * IV - Sky Blue
#         8: "# *32CD32",  # * V - Lime Green
#         9: "# *6A5ACD",  # * VI - Slate Blue
#         10: "# *008080",  # * VII - Teal
#         11: "# *4682B4",  # * VIII - Steel Blue
#         12: "# *8B0000",  # * IX - Dark Red
#         13: "# *4B0082"   # * X - Indigo
#     }

#     # * Extract unique class IDs and class names
#     class_ids = df_unpivot[["classId", "className"]].drop_duplicates(subset=["classId"])
#     class_ids = class_ids.sort_values(by=["classId"]).reset_index(drop=True)

#     # * Add color column based on classId
#     class_ids["color"] = class_ids["classId"].map(class_colors)
#     class_ids.to_csv(r"D:\GITHUB\kotak-school-dbms\output_data\class_table_2024_25.csv", index=False)

#     # * Step 19: Final DataFrame Cleanup
#     branch_ids = df_unpivot[["branchId", "branchName",]].drop_duplicates(subset=["branchId"])
#     branch_ids = branch_ids.sort_values(by=['branchId']).reset_index(drop=True)
#     branch_ids.to_csv(r"D:\GITHUB\kotak-school-dbms\output_data\branch_table_2024_25.csv", index=False)
    
#     AttendanceStatus_table = df_unpivot[['AttendanceStatusId','AttendanceStatus']].drop_duplicates(subset=["AttendanceStatusId"])
    
#     AttendanceStatus_table.to_csv(r"D:\GITHUB\kotak-school-dbms\output_data\AttendanceStatus_table_2024_25.csv", index=False)
    
#     df_unpivot.to_csv(r"D:\GITHUB\kotak-school-dbms\output_data\attendance_report_2024_25.csv", index=False)    

#     df_unpivot = df_unpivot[['Date', 'AdmissionNo', 'ClassNo', 'classId', 'branchId', 'AttendanceStatusId']]
    
#     return df_unpivot

## **📌 Step 5: Insert Data into MySQL*

In [84]:
# def update_database(df):
#     """Insert attendance data into MySQL database."""
#     password = urllib.parse.quote(MYSQL_CREDENTIALS["password"])
#     engine = create_engine(f"mysql+pymysql://{MYSQL_CREDENTIALS['username']}:{password}"
#                            f"@{MYSQL_CREDENTIALS['host']}:{MYSQL_CREDENTIALS['port']}/{MYSQL_CREDENTIALS['database']}")
    
#     try:
#         print(f"Connecting to database {MYSQL_CREDENTIALS['database']} at {MYSQL_CREDENTIALS['host']}...\n")
        
#         with engine.begin() as conn:
#             print(f"Truncating existing table: {TABLE_NAME}")
#             conn.execute(text(f"TRUNCATE TABLE {TABLE_NAME};"))
#             print(f"Inserting data into {TABLE_NAME} table...\n")
            
#             # * Try inserting the data in chunks (optional to prevent overload)
#             df.to_sql(name=TABLE_NAME, con=engine, if_exists='append', index=False, chunksize=1000)  # * Chunks of 1000 rows
#             print(f"✅ Data successfully inserted into '{TABLE_NAME}' table.\n")
    
#     except Exception as e:
#         print(f"❌ An error occurred. Check the logs for details. Error: {e}")
#         logging.error(f"❌ Database update failed: {e}")
#         logging.error(f"❌ Failed Data Sample (first 5 rows): \n{df.head()}")
#         logging.error(f"Total Rows in DataFrame: {df.shape[0]}")
        
#         # * Capture more details to help debug
#         logging.error("MySQL Connection Information:")
#         logging.error(f"Host: {MYSQL_CREDENTIALS['host']}")
#         logging.error(f"Database: {MYSQL_CREDENTIALS['database']}")
#         logging.error(f"Port: {MYSQL_CREDENTIALS['port']}")
#         logging.error("Error Traceback:")
#         import traceback
#         logging.error(traceback.format_exc())


## **📌 Step 6: Run the Full Pipeline*

In [85]:
# def main():
#     file1 = r"D:\GITHUB\kotak-school-dbms\source_data\Attendance Reports\AttendanceReportUptoSeptember_2024_25.csv"
#     file2 = r"D:\GITHUB\kotak-school-dbms\source_data\Attendance Reports\AttendanceOctoberToDecember_2024_25.csv"
#     file3 = r"D:\GITHUB\kotak-school-dbms\source_data\Attendance Reports\AttendanceUptoMarch_2024_25.csv"
#     output_file = r"D:\GITHUB\kotak-school-dbms\output_data\AttendanceReport_2024_25.csv"
    
#     try:
#         print("Loading and cleaning data...\n")
#         df = load_and_clean_data(file1, file2, file3)
#         print(f"✅ Data loaded with {df.shape[0]} rows.\n")
        
#         print("Processing attendance data...\n")
#         df_unpivot = process_attendance_data(df)
#         df_unpivot.to_csv(output_file, index=False)
#         print(f"✅ Processed data with {df_unpivot.shape[0]} rows.\n")
#         print("✅ Columns are:\n", df_unpivot.columns)
#         print(max(df_unpivot["Date"]))
                
#         print("Updating database...\n")
#         update_database(df_unpivot)
#         print("✅ Data updated successfully!\n")

#         print("✅ Attendance report processing completed successfully!\n")
#         print(f"✅ No of Rows: {df_unpivot.shape[0]}\n")
        
#     except Exception as e:
#         print(f"❌ An unexpected error occurred. Error: {e}\n")
#         logging.error(f"❌ Unexpected error: {e}\n")


# # * Run the script
# main()


In [86]:
# import pandas as pd

# # Load CSV file
# df_unpivot = pd.read_csv(r"D:\GITHUB\kotak-school-dbms\output_data\AttendanceReport_2024_25.csv")

# # Apply filtering correctly
# print("Attendance of Student with AdmissionNo 14588 when Absent")
# filtered_df = df_unpivot[(df_unpivot["AdmissionNo"] == "14588") & (df_unpivot["AttendanceStatusId"] == 2)]

# filtered_df  # Display the filtered data


<h2 align="center"><b>Class Table 2024-25</b></h2>

In [87]:
import pandas as pd
from oauth2client.service_account import ServiceAccountCredentials
from sqlalchemy import create_engine, text
import urllib.parse
import traceback
import logging
from sqlalchemy import create_engine, text


MYSQL_CREDENTIALS = {
    "username": "root",
    "password": "Hari@123",
    "host": "localhost",
    "port": "3306",
    "database": "schooldb",
}
TABLE_NAME = "class_table_2024_25"

In [88]:
df = pd.read_csv(r"D:\GITHUB\kotak-school-dbms\output_data\class_section_grade_table_2024_25.csv")
df.head()

Unnamed: 0,ClassNo,Class,classId,className,branchId,branchName
0,1,Pre KG,1,Pre KG,1,Kindergarten
1,2,LKG - A,2,LKG,1,Kindergarten
2,3,LKG - B,2,LKG,1,Kindergarten
3,4,UKG - A,3,UKG,1,Kindergarten
4,5,UKG - B,3,UKG,1,Kindergarten


In [89]:
df.columns

Index(['ClassNo', 'Class', 'classId', 'className', 'branchId', 'branchName'], dtype='object')

In [90]:
# * Define Table Schema (Modify based on actual data types)
TABLE_SCHEMA = f"""
CREATE TABLE IF NOT EXISTS {TABLE_NAME} (
    ClassNo INT PRIMARY KEY,
    Class VARCHAR(50),
    classId INT,
    className VARCHAR(50),
    branchId INT,
    branchName VARCHAR(50)
);"""

In [91]:


def update_database(df):
    """Insert attendance data into MySQL database."""
    password = urllib.parse.quote(MYSQL_CREDENTIALS["password"])
    engine = create_engine(f"mysql+pymysql://{MYSQL_CREDENTIALS['username']}:{password}"
                           f"@{MYSQL_CREDENTIALS['host']}:{MYSQL_CREDENTIALS['port']}/{MYSQL_CREDENTIALS['database']}")
    
    try:
        print(f"Connecting to database {MYSQL_CREDENTIALS['database']} at {MYSQL_CREDENTIALS['host']}...")
        logging.info(f"Connecting to database {MYSQL_CREDENTIALS['database']} at {MYSQL_CREDENTIALS['host']}...")

        with engine.begin() as conn:
            # * *Create Table if it does not exist*
            print(f"Checking if table '{TABLE_NAME}' exists...")
            conn.execute(text(TABLE_SCHEMA))
            print(f"✅ Table '{TABLE_NAME}' is ready.")

            # * *Truncate the table before inserting data*
            print(f"Truncating existing table: {TABLE_NAME}")
            conn.execute(text(f"TRUNCATE TABLE {TABLE_NAME};"))

            # * *Insert Data*
            print(f"Inserting data into {TABLE_NAME} table...")
            df.to_sql(name=TABLE_NAME, con=engine, if_exists='append', index=False, chunksize=1000, method='multi')

            print(f"✅ Data successfully inserted into '{TABLE_NAME}' table.")
            logging.info(f"✅ Data successfully inserted into '{TABLE_NAME}' table.")

    except Exception as e:
        error_message = f"❌ An error occurred: {e}"
        print(error_message)
        logging.error(error_message)
        logging.error("Error Traceback:\n" + traceback.format_exc())

In [92]:
update_database(df)

Connecting to database schooldb at localhost...
Checking if table 'class_table_2024_25' exists...
✅ Table 'class_table_2024_25' is ready.
Truncating existing table: class_table_2024_25
Inserting data into class_table_2024_25 table...
✅ Data successfully inserted into 'class_table_2024_25' table.


<h2 align="center"><b>FEE COLLECTION REPORT 2024-25</b></h2>

## **Import Required Libraries*

In [93]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from sqlalchemy import create_engine, text

## **Define Login Credentials and MySQL Credentials*
# * 🔹 Login Credentials
login_url = "https://app.myskoolcom.tech/kotak_vizag/login"
data_url = "https://app.myskoolcom.tech/kotak_vizag/office_fee/fee_consolidate_report_print?&from=2024-04-01&to=&is_transport_fee=&college_id=&course_id=&branch_id=&semister_id=&section_id=&academic_years_id=&payment_type_id=&fee_status=&status=1&imageField=Search"

credentials = {
    "uname": "harikiran",
    "psw": "812551"
}

MYSQL_CREDENTIALS = {
    "username": "root",
    "password": "Hari@123",
    "host": "localhost",
    "port": "3306",
    "database": "schooldb",
}

TABLE_NAME = "fees_collection_2024_25"


In [94]:
## **Function to Log in to Website*
def login_to_website():
    session = requests.Session()
    login_response = session.post(login_url, data=credentials)
    if "Invalid" in login_response.text:
        print("❌ Login failed! Check credentials.\n")
        return None
    else:
        print("✅ Login successful!\n")
        return session

In [95]:
## **Function to Fetch All Fee Tables*
def fetch_all_fee_tables(session):
    response = session.get(data_url)
    soup = BeautifulSoup(response.text, "html.parser")

    # * Find all fee tables
    fee_tables = soup.find_all("table", class_="b-t")

    all_data = []  # * List to store all rows

    # * Loop through each table and extract data
    for table in fee_tables:
        df = table_to_dataframe(table)
        if df is not None:
            all_data.append(df)

    # * Merge all class data into a single DataFrame
    combined_df = pd.concat(all_data, ignore_index=True)
    return combined_df

In [96]:
## **Function to Convert HTML Table to DataFrame*
def table_to_dataframe(table):
    if not table:
        print("❌ No table to convert!")
        return None

    # * Extract column headers
    headers = [th.get_text(strip=True) for th in table.find_all("th")]

    # * Extract table rows
    rows = []
    for tr in table.find_all("tr")[1:]:  # * Skip header row
        cells = [td.get_text(strip=True) for td in tr.find_all("td")]
        if cells:
            rows.append(cells)

    # * Convert to Pandas DataFrame
    df = pd.DataFrame(rows, columns=headers)
    return df

In [97]:
import numpy as np

## **Function to Clean Data*
def clean_data(df):
    
    # * Drop rows where the first column starts with "Total"
    df = df[~df.iloc[:, 0].astype(str).str.startswith("Total", na=False)].copy()    
    
    # * Ensure 'Admission No' is treated as a string (no conversion to numeric)
    df.loc[:,'Admin No.'] = df['Admin No.'].astype(str)
    
    df.columns = ['SNo', 'AdmissionNo', 'Name', 'Abacus / Vediic Maths', 'TERM FEE',
       'Total_Fees', 'Abacus / Vediic Maths', 'TERM FEE',
       'Total_Fee_Paid', 'Discount_Concession', 'Total_Due']
    
    # * Convert relevant columns to numeric (removing commas)
    numeric_columns = ["Total_Fees", "Total_Fee_Paid", "Discount_Concession", "Total_Due"]
    
    for col in numeric_columns:
        df[col] = df[col].astype(str)  # * Convert everything to string
        df[col] = df[col].str.replace(",", "")  # * Remove commas
        df[col] = df[col].replace(["", "None", "nan", "NaN", np.nan], np.nan)  # * Replace invalid values with NaN
        df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)  # * Convert to float, replace NaN with 0
        df[col] = df[col].astype(int)  # * Convert to integer
        
    df["SNo"] = range(1, len(df) + 1)       
        
    
    df = df.drop(columns=['Abacus / Vediic Maths', 'TERM FEE', 'Abacus / Vediic Maths', 'TERM FEE'])
    

    return df

In [98]:
## **Function to Update MySQL Database*
def update_database(df):
    password = urllib.parse.quote(MYSQL_CREDENTIALS["password"])
    engine = create_engine(f"mysql+pymysql://{MYSQL_CREDENTIALS['username']}:{password}"
                           f"@{MYSQL_CREDENTIALS['host']}:{MYSQL_CREDENTIALS['port']}/{MYSQL_CREDENTIALS['database']}")

    try:
        with engine.connect() as conn:
            # * 🔹 Truncate the table before inserting new data
            conn.execute(text(f"TRUNCATE TABLE {TABLE_NAME};"))
            print(f"✅ All records from the '{TABLE_NAME}' table have been deleted.\n")

            # * 🔹 Insert the data into the table
            df.to_sql(name=TABLE_NAME, con=engine, if_exists='append', index=False)
            print(f"✅ Data successfully inserted into the '{TABLE_NAME}' table.\n")

    except Exception as e:
        print(f"❌ Error occurred while updating database: {e}")

In [99]:
## **Main Execution Flow*
def main():
    # * Log in to the website
    session = login_to_website()
    if session is None:
        return

    # * Fetch all fee tables
    df = fetch_all_fee_tables(session)

    if not df.empty:
        print("✅ Data extracted successfully! Cleaning data...\n")

        # * Clean the data
        df = clean_data(df)
        print("✅ Columns are:'\n",df.columns)

        # * Save to CSV (optional)
        df.to_csv(r"D:\GITHUB\kotak-school-dbms\output_data\fee_collection_report_2024_25.csv", index=False)
        print("✅ Data saved to 'daywise_fee_collection_report.csv'\n")

        # * Insert data into MySQL database
        update_database(df)
        print(f"✅ {len(df)} records entered into the database")

        # * Print sample data
        print(df.to_string(index=False))

    else:
        print("❌ No data found! The page structure might have changed.")

In [100]:
## **Run the Main Function*
if __name__ == "__main__":
    main()


✅ Login successful!

✅ Data extracted successfully! Cleaning data...

✅ Columns are:'
 Index(['SNo', 'AdmissionNo', 'Name', 'Total_Fees', 'Total_Fee_Paid',
       'Discount_Concession', 'Total_Due'],
      dtype='object')
✅ Data saved to 'daywise_fee_collection_report.csv'

✅ All records from the 'fees_collection_2024_25' table have been deleted.

✅ Data successfully inserted into the 'fees_collection_2024_25' table.

✅ 1805 records entered into the database
 SNo AdmissionNo                                              Name  Total_Fees  Total_Fee_Paid  Discount_Concession  Total_Due
   1       17183                                 NIHANSHU CHOWDARI       24000               0                    0      24000
   2       17214                                            VAMIKA       24000               0                    0      24000
   3       17216                                       SANVI KUNDU       24000               0                    0      24000
   4       17261             

<h2 align="center"><b>FEE CONCESSION REPORT 2024-25</b></h2>

In [101]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import urllib.parse
from sqlalchemy import create_engine, text
import pymysql  # * Ensure pymysql is installed

## **Define Login Credentials and MySQL Credentials*
login_url = "https://app.myskoolcom.tech/kotak_vizag/login"
data_url = "https://app.myskoolcom.tech/kotak_vizag/office_fee/fee_discounts_report_receipt_wise_print?&academic_years_id=1"

credentials = {
    "uname": "harikiran",
    "psw": "812551"
}

MYSQL_CREDENTIALS = {
    "username": "root",
    "password": "Hari@123",
    "host": "localhost",
    "port": "3306",
    "database": "schooldb",
}

TABLE_NAME = "fee_concession_2024_25"

## **Function to Log in to Website*
def login_to_website():
    session = requests.Session()
    login_response = session.post(login_url, data=credentials)
    
    if login_response.status_code != 200:
        print("❌ Login request failed! Server error.\n")
        return None
    
    soup = BeautifulSoup(login_response.text, "html.parser")
    if soup.find("div", class_="alert-danger"):
        print("❌ Login failed! Check credentials.\n")
        return None
    
    print("✅ Login successful!\n")
    return session

## **Function to Fetch All Fee Tables*
def fetch_all_concession_tables(session):
    response = session.get(data_url)
    soup = BeautifulSoup(response.text, "html.parser")
    
    concession_tables = soup.find_all("table", class_="table_view")
    if not concession_tables:
        print("❌ No fee tables found! The page structure may have changed.")
        return None

    all_data = []
    for table in concession_tables:
        df = table_to_dataframe(table)
        if df is not None:
            all_data.append(df)

    if not all_data:
        print("❌ No data extracted from tables.")
        return None

    combined_df = pd.concat(all_data, ignore_index=True)
    return combined_df

## **Function to Convert HTML Table to DataFrame*
def table_to_dataframe(table):
    headers = [th.get_text(strip=True) for th in table.find_all("th")]
    if len(headers) > 8:
        headers = headers[:8]  # * Keep only the first 8 columns
    
    rows = []
    for tr in table.find_all("tr")[1:]:
        cells = [td.get_text(strip=True) for td in tr.find_all("td")]
        if len(cells) >= 8:
            rows.append(cells[:8])  # * Keep only the first 8 columns
    
    df = pd.DataFrame(rows, columns=headers) if rows else None
    return df

## **Function to Clean Data*
def clean_data(df):
    df.columns = df.columns.str.strip().str.replace(" ", "_").str.lower()
    
    df.fillna("", inplace=True)  
    
    # * Ensure numeric columns are converted correctly
    numeric_cols = ["discount_given"]  # * Modify as needed
    for col in numeric_cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)

    # * Ensure 'date' column is in correct format
    if "date" in df.columns:
        df["date"] = pd.to_datetime(df["date"], errors="coerce").dt.date  # * Converts to YYYY-MM-DD
        df = df.dropna(subset=["date"])  # * Remove rows where 'date' is still NaT (invalid date)
    
    df = df.drop(columns=[ 'receipt_no', 'fee_name',
       'fee_amount',  'total_due_amount'])
    
    df["id"] = range(1, len(df) + 1)
    
    df = df[['id', 'date', 'student_number',  'student_name', 'discount_given']]
    
    
    return df

## **Function to Update MySQL Database*
def update_database(df):
    password = urllib.parse.quote(MYSQL_CREDENTIALS["password"])
    engine = create_engine(f"mysql+pymysql://{MYSQL_CREDENTIALS['username']}:{password}"
                           f"@{MYSQL_CREDENTIALS['host']}:{MYSQL_CREDENTIALS['port']}/{MYSQL_CREDENTIALS['database']}")
    try:
        with engine.connect() as conn:
            conn.execute(text(f"TRUNCATE TABLE {TABLE_NAME};"))
            print(f"✅ All records from '{TABLE_NAME}' table deleted.\n")
            df.to_sql(name=TABLE_NAME, con=engine, if_exists="append", index=False)
            print(f"✅ Data successfully inserted into '{TABLE_NAME}'.\n")
    except Exception as e:
        print(f"❌ Error updating database: {e}")

## **Main Execution Flow*
def main():
    session = login_to_website()
    if session is None:
        return
    
    df = fetch_all_concession_tables(session)
    if df is None or df.empty:
        print("❌ No data found! The page structure might have changed.")
        return
    
    print("✅ Data extracted successfully! Cleaning data...\n")
    df = clean_data(df)
    print("✅ Data cleaned successfully!\n")
    print("✅ Columns are:\n", df.columns)
    output_file = r"D:\\GITHUB\\kotak-school-dbms\\output_data\\fee_concession_report_2024_25.csv"
    df.to_csv(output_file, index=False)
    print(f"✅ Data saved to '{output_file}'\n")
    update_database(df)
    print(f"✅ {len(df)} records entered into the database")
    print(df.to_string())

## **Run the Main Function*
if __name__ == "__main__":
    main()

✅ Login successful!

✅ Data extracted successfully! Cleaning data...

✅ Data cleaned successfully!

✅ Columns are:
 Index(['id', 'date', 'student_number', 'student_name', 'discount_given'], dtype='object')
✅ Data saved to 'D:\\GITHUB\\kotak-school-dbms\\output_data\\fee_concession_report_2024_25.csv'

✅ All records from 'fee_concession_2024_25' table deleted.

✅ Data successfully inserted into 'fee_concession_2024_25'.

✅ 158 records entered into the database
      id        date student_number                                       student_name  discount_given
0      1  2024-07-27          15660                        POTHIRENDI YAASHVAN (V - A)           500.0
1      2  2024-07-27          16070                     DONGA YASMITH JAYA SAI (V - B)           500.0
2      3  2024-07-27          16105                           JAGARANA GEETIKA (V - A)           500.0
3      4  2024-07-27          16165                            SUMUEL NAGARAPU (X - B)           500.0
4      5  2024-07-27 

<h2 align="center"><b>FB NO's EXTRACTION 2024-25</b></h2>

In [102]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
from sqlalchemy import create_engine, text

# * *Define Credentials*
login_url = "https://app.myskoolcom.tech/kotak_vizag/login"
data_url = "https://app.myskoolcom.tech/kotak_vizag/office_fee/fee_reports_day_wise_receipt_wise_print"

credentials = {
    "uname": "harikiran",
    "psw": "812551"
}

MYSQL_CREDENTIALS = {
    "username": "root",
    "password": "Hari@123",
    "host": "localhost",
    "port": "3306",
    "database": "schooldb",
}

TABLE_NAME = "daywise_fees_collection_2024_25"

# * *Login to Website*
session = requests.Session()
login_response = session.post(login_url, data=credentials)

if login_response.status_code != 200:
    print("❌ Login failed! Check credentials.\n")
    exit()
else:
    print("✅ Login successful!\n")

# * *Fetch Fee Report Page*
response = session.get(data_url)
soup = BeautifulSoup(response.text, "html.parser")
table = soup.find("table")

if not table:
    print("❌ Table not found! The page structure might have changed.")
    exit()

print("✅ Table found! Extracting data...\n")

# * *Extract Data from Table*
rows = []
for tr in table.find_all("tr"):
    cols = [td.text.strip() for td in tr.find_all("td")]
    if cols:
        rows.append(cols)

# * *Define Header*
header_row = [
    "SNo", "RecieptNo", "Class", "AdmissionNo", "StudentName", 
    "Date", "-", "Abacus / Vediic Maths", "TERM FEE", 
    "ReceivedAmount", "FB_No"
]

df = pd.DataFrame(rows, columns=header_row)

# * *Clean Data*
df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%Y', errors='coerce')
df['AdmissionNo'] = df['AdmissionNo'].astype(str)
df["Class"] = df["Class"].str.replace("/", " - ")

# * *Filter out rows after TERM entries*
term_index = df[df["SNo"].str.contains("TERM", na=False)].index

if not term_index.empty:
    df = df.iloc[:term_index[0]]

df = df.drop(columns=["-", "Abacus / Vediic Maths", "TERM FEE"], errors="ignore")
df.columns = df.columns.str.strip()
df["FB_No"] = df["FB_No"].replace(r"^\s*$", pd.NA, regex=True)
df = df.dropna(subset=["FB_No"]).reset_index(drop=True)
df = df[['AdmissionNo', 'StudentName', 'FB_No']]

# * *Data Cleaning - FB_No Column*
df["FB_No"] = (
    df["FB_No"]
    .str.replace(r"Fine:\s*100/-", "", regex=True)  # * Remove "Fine: 100/-"
    .str.replace(r",\s*$", "", regex=True)  # * Remove trailing commas and spaces
)

df = df[df["FB_No"].str.contains(r'\d$', na=False)].copy()  # * Keep rows where FB_No ends with a digit

# * *Remove occurrences like "Dec-2024, Sep-2024"*
df["FB_No"] = (
    df["FB_No"]
    .str.replace(r'(?:[A-Za-z]{3}-\d{4},?\s*)+', '', regex=True)
    .str.replace(r',\s*$', '', regex=True)
    .str.strip()
)

# * *Remove leading special characters*
df["FB_No"] = df["FB_No"].replace(r'^[^\w\s]+', '', regex=True)

# * *Custom Function to Extract Final FB_No*
def clean_FB_No(text):
    text = re.sub(r'^\d+\s*', '', text)  # * Remove leading digits
    match = re.search(r'\d+$', text)  # * Extract only the trailing digits
    return match.group() if match else ''

df["FB_No"] = df["FB_No"].apply(clean_FB_No)

df = df.rename(columns={"FB_No": "FB_No"})

print(df.to_string())
print(f"✅ {len(df)} Records Processed")



# * # * *Insert into MySQL*
# * engine = create_engine(
# *     f"mysql+pymysql://{MYSQL_CREDENTIALS['username']}:{MYSQL_CREDENTIALS['password']}@"
# *     f"{MYSQL_CREDENTIALS['host']}:{MYSQL_CREDENTIALS['port']}/{MYSQL_CREDENTIALS['database']}"
# * )

# * with engine.connect() as connection:
# *     df.to_sql(TABLE_NAME, con=connection, if_exists="replace", index=False)

# * print(f"✅ Data inserted into `{TABLE_NAME}`")


✅ Login successful!

✅ Table found! Extracting data...

     AdmissionNo                                        StudentName  FB_No
0          17025                                     SIDDIQUA FATMA       
1          16517                             KARTHIKA SRI POLIPILLI       
2          16246                                      SETTY PUNEETH       
3          16866                               VADDADI BHANU CHARAN       
4          16941                                 NITHYA DEVI SREE C    542
5          15655                               BOOTHI PRINIYASMITHA    639
6          15825                           KURRU JHANSI SRI LAKSHMI       
7          16184                                   DALLI HETHANSHRI       
8          16732                                       SETTY BAVIKA       
9          16220                          YADAGIRI BHAGATH SRINIVAS    705
10         17041                           TATTA T R L V S NIHARIKA    685
11         16070                            

In [103]:
# * Replace empty strings with NaN and then fill with 0
df["FB_No"] = df["FB_No"].replace('', pd.NA).fillna(0)

# * Convert to integer safely
df["FB_No"] = pd.to_numeric(df["FB_No"], errors='coerce').fillna(0).astype(int)

# * Sort in descending order
df = df.sort_values(by='FB_No', ascending=False)

df


Unnamed: 0,AdmissionNo,StudentName,FB_No
1852,16774,SHEIK SHANAYA,17804
534,17025,SIDDIQUA FATMA,7825
4560,15554,ARJA NAYNEKKA,2700
4544,16893,PILLA VISHWA TEJA,2025
4543,16877,DRUSHYA VADLAMOORI,2025
...,...,...,...
2,16246,SETTY PUNEETH,0
1,16517,KARTHIKA SRI POLIPILLI,0
0,17025,SIDDIQUA FATMA,0
4872,17218,BHUSARAPU NAINIKA SAI,0


In [104]:
df["FB_No"] = pd.to_numeric(df["FB_No"], errors="coerce").fillna(0).astype(int)
df = df[df["FB_No"] <= 2000]

df = df[~df["FB_No"].isin([0])]

df


Unnamed: 0,AdmissionNo,StudentName,FB_No
3164,15922,ALAMANDA SAI SAKETH,1929
4709,15922,ALAMANDA SAI SAKETH,1929
4845,15811,DANDA RASHMITHA SAI,1927
4512,15811,DANDA RASHMITHA SAI,1927
4846,15811,DANDA RASHMITHA SAI,1927
...,...,...,...
3333,16698,MADHUPADA NAVNEETH,2
2155,16698,MADHUPADA NAVNEETH,2
727,17006,CHEKKA SATYA BHARGAV,1
728,17006,CHEKKA SATYA BHARGAV,1


In [105]:
df_multiple_fb = df.groupby("AdmissionNo")["FB_No"].unique()

df_multiple_fb = df_multiple_fb[df_multiple_fb.apply(len) > 1]  # * Keep only AdmissionNos with multiple FB_No
df_multiple_fb = df_multiple_fb.apply(lambda x: ", ".join(map(str, x)))  # * Convert numbers to strings before joining

df_multiple_fb = df_multiple_fb.reset_index()

# * *Merge with the original DataFrame & Update FB_No*
df = df.merge(df_multiple_fb, on="AdmissionNo", how="left", suffixes=("", "_multiple"))

df["FB_No"] = df["FB_No_multiple"].fillna(df["FB_No"])  # * Update FB_No where multiple exist
df.drop(columns=["FB_No_multiple"], inplace=True)  # * Remove the extra column

df


Unnamed: 0,AdmissionNo,StudentName,FB_No
0,15922,ALAMANDA SAI SAKETH,"1929, 1039"
1,15922,ALAMANDA SAI SAKETH,"1929, 1039"
2,15811,DANDA RASHMITHA SAI,1927
3,15811,DANDA RASHMITHA SAI,1927
4,15811,DANDA RASHMITHA SAI,1927
...,...,...,...
4661,16698,MADHUPADA NAVNEETH,"542, 2"
4662,16698,MADHUPADA NAVNEETH,"542, 2"
4663,17006,CHEKKA SATYA BHARGAV,1
4664,17006,CHEKKA SATYA BHARGAV,1


In [106]:
df = df.drop_duplicates(subset=["AdmissionNo"]).reset_index(drop=True)

df

Unnamed: 0,AdmissionNo,StudentName,FB_No
0,15922,ALAMANDA SAI SAKETH,"1929, 1039"
1,15811,DANDA RASHMITHA SAI,1927
2,16633,KINTHADA TEJASWIN,1921
3,16370,GARIKINA YOGITHA,1917
4,16518,MOSES HENDRICKS TURUK,1915
...,...,...,...
1526,17114,KARNENA MONISHASRI,6
1527,17111,P V SHAANVI BOLLINENI,5
1528,17109,BURLA KARTHIKEYA,4
1529,16729,KAGITHALA GEETHANANDAN REDDY,3


In [107]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
from sqlalchemy import create_engine

def fetch_and_clean_fee_data():
    # * *Define Credentials*
    login_url = "https://app.myskoolcom.tech/kotak_vizag/login"
    data_url = "https://app.myskoolcom.tech/kotak_vizag/office_fee/fee_reports_day_wise_receipt_wise_print"

    credentials = {"uname": "harikiran", "psw": "812551"}
    
    MYSQL_CREDENTIALS = {
        "username": "root",
        "password": "Hari@123",
        "host": "localhost",
        "port": "3306",
        "database": "schooldb",
    }

    TABLE_NAME = "daywise_fees_collection_2024_25"

    # * *Login to Website*
    session = requests.Session()
    login_response = session.post(login_url, data=credentials)
    if login_response.status_code != 200:
        print("❌ Login failed! Check credentials.\n")
        return None
    
    print("✅ Login successful!\n")

    # * *Fetch Fee Report Page*
    response = session.get(data_url)
    soup = BeautifulSoup(response.text, "html.parser")
    table = soup.find("table")
    if not table:
        print("❌ Table not found! The page structure might have changed.")
        return None

    print("✅ Table found! Extracting data...\n")

    # * *Extract Data from Table*
    rows = []
    for tr in table.find_all("tr"):
        cols = [td.text.strip() for td in tr.find_all("td")]
        if cols:
            rows.append(cols)

    # * *Define Header*
    header_row = [
        "SNo", "RecieptNo", "Class", "AdmissionNo", "StudentName", 
        "Date", "-", "Abacus / Vediic Maths", "TERM FEE", 
        "ReceivedAmount", "FB_No"
    ]
    df = pd.DataFrame(rows, columns=header_row)

    # * *Clean Data*
    df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%Y', errors='coerce')
    df['AdmissionNo'] = df['AdmissionNo'].astype(str)
    df["Class"] = df["Class"].str.replace("/", " - ")

    # * *Filter out rows after TERM entries*
    term_index = df[df["SNo"].str.contains("TERM", na=False)].index
    if not term_index.empty:
        df = df.iloc[:term_index[0]]

    df = df.drop(columns=["-", "Abacus / Vediic Maths", "TERM FEE"], errors="ignore")
    df.columns = df.columns.str.strip()
    df["FB_No"] = df["FB_No"].replace(r"^\s*$", pd.NA, regex=True)
    df = df.dropna(subset=["FB_No"]).reset_index(drop=True)
    df = df[['AdmissionNo', 'StudentName', 'FB_No']]

    # * *Data Cleaning - FB_No Column*
    df["FB_No"] = (
        df["FB_No"]
        .str.replace(r"Fine:\s*100/-", "", regex=True)
        .str.replace(r",\s*$", "", regex=True)
    )
    df = df[df["FB_No"].str.contains(r'\d$', na=False)].copy()

    # * # * *Remove occurrences like "Dec-2024, Sep-2024"*
    # * df["FB_No"] = (
    # * df["FB_No"]
    # * .str.replace(r"[^0-9]+(?=\d*$)", "", regex=True)  # * Remove any non-digit characters after the last digit
    # * .str.replace(r',\s*$', '', regex=True)  # * Remove trailing commas
    # * .str.strip()
# * )
    
    # * *Remove occurrences like "Dec-2024, Sep-2024"*
    df["FB_No"] = (
        df["FB_No"]
        .str.replace(r'(?:[A-Za-z]{3}-\d{4},?\s*)+', '', regex=True)
        .str.replace(r',\s*$', '', regex=True)
        .str.strip()
    )
    df["FB_No"] = df["FB_No"].replace(r'^[^\w\s]+', '', regex=True)

    # * *Custom Function to Extract Final FB_No*
    def clean_FB_No(text):
        text = re.sub(r'^\d+\s*', '', text)
        match = re.search(r'\d+$', text)
        return match.group() if match else ''

    df["FB_No"] = df["FB_No"].apply(clean_FB_No)
    df["FB_No"] = df["FB_No"].replace('', pd.NA).fillna(0)
    df["FB_No"] = pd.to_numeric(df["FB_No"], errors='coerce').fillna(0).astype(int)
    df = df.sort_values(by='FB_No', ascending=False)
    df = df[df["FB_No"] <= 2000]
    df = df[~df["FB_No"].isin([0])]

    # * *Handle Multiple FB_No per AdmissionNo*
    df_multiple_fb = df.groupby("AdmissionNo")["FB_No"].unique()
    df_multiple_fb = df_multiple_fb[df_multiple_fb.apply(len) > 1]
    df_multiple_fb = df_multiple_fb.apply(lambda x: ", ".join(map(str, x)))
    df_multiple_fb = df_multiple_fb.reset_index()

    df = df.merge(df_multiple_fb, on="AdmissionNo", how="left", suffixes=("", "_multiple"))
    df["FB_No"] = df["FB_No_multiple"].fillna(df["FB_No"])
    df.drop(columns=["FB_No_multiple"], inplace=True)
    df = df.drop_duplicates(subset=["AdmissionNo"]).reset_index(drop=True)
    
    df.to_csv(r"D:\GITHUB\kotak-school-dbms\output_data\fee_book_nos_report_2024_25.csv", index=False)

    print(f"✅ {len(df)} Records Processed")
    return df

# * *Execute Function*
df_result = fetch_and_clean_fee_data()
if df_result is not None:
    print(df_result.to_string())


✅ Login successful!

✅ Table found! Extracting data...

✅ 1531 Records Processed
     AdmissionNo                                        StudentName       FB_No
0          15922                                ALAMANDA SAI SAKETH  1929, 1039
1          15811                                DANDA RASHMITHA SAI        1927
2          16633                                  KINTHADA TEJASWIN        1921
3          16370                                   GARIKINA YOGITHA        1917
4          16518                              MOSES HENDRICKS TURUK        1915
5          16392                                MYLIPILLI SANVISREE        1913
6          16538                                    ANIRUDH BUDDALA        1912
7          16336                                   KADALI SAMRUDDHI        1911
8          16542                              THUMPALA VINAY RAKESH        1910
9          16498                                 NEMALIPURI KUSHIKA        1909
10         16693                       

In [108]:
df_result[df_result["FB_No"].str.contains(r",", na=False)].head()

Unnamed: 0,AdmissionNo,StudentName,FB_No
0,15922,ALAMANDA SAI SAKETH,"1929, 1039"
17,15487,VASUPALLI SUDHARSAN,"1735, 1674"
18,14905,YUVANA CHANDRA AKULA,"1735, 1065"
39,17126,PONNADA BHAGATSINGH,"1691, 11"
82,14153,JAYANTH VASUPALLI,"1644, 1642"


In [109]:
df_result = df_result

In [110]:
# from selenium import webdriver
# from selenium.webdriver.common.by import By
# from selenium.webdriver.common.keys import Keys
# from selenium.webdriver.chrome.service import Service
# from selenium.webdriver.support.ui import WebDriverWait
# from selenium.webdriver.support import expected_conditions as EC
# import os
# import time
# from datetime import datetime

# # 🔹 Constants
# login_url = "https://app.myskoolcom.tech/kotak_vizag/login"
# attendance_url = "https://app.myskoolcom.tech/kotak_vizag/admin/attedance_grid"

# credentials = {
#     "uname": "harikiran",
#     "psw": "812551"
# }

# # 🔹 Set ChromeDriver path
# chromedriver_path = r"G:\chromedriver-win64\chromedriver-win64\chromedriver.exe"

# # 🔹 Set the download folder
# download_folder = r"D:\GITHUB\kotak-school-dbms\source_data\Attendance Reports"

# # 🔹 Define attendance report periods
# report_periods = [
#     {"start_date": "2024-07-17", "end_date": "2024-09-30", "filename": "AttendanceReportUptoSeptember_2024_25.csv"},
#     {"start_date": "2024-10-01", "end_date": "2024-12-31", "filename": "AttendanceOctoberToDecember_2024_25.csv"},
#     {"start_date": "2025-01-01", "end_date": datetime.today().strftime("%Y-%m-%d"), "filename": "AttendanceUptoMarch_2024_25.csv"},
# ]

# # 🔹 Chrome options
# chrome_options = webdriver.ChromeOptions()
# prefs = {"download.default_directory": download_folder}  # ✅ Set download directory
# chrome_options.add_experimental_option("prefs", prefs)

# # 🔹 Start WebDriver
# service = Service(chromedriver_path)
# driver = webdriver.Chrome(service=service, options=chrome_options)
# wait = WebDriverWait(driver, 10)

# # **Step 1: Login to the website**
# driver.get(login_url)
# wait.until(EC.presence_of_element_located((By.NAME, "uname"))).send_keys(credentials["uname"])
# driver.find_element(By.NAME, "psw").send_keys(credentials["psw"])
# driver.find_element(By.NAME, "psw").send_keys(Keys.RETURN)
# print("✅ Logged in successfully!")

# # **Step 2: Go to Attendance Page**
# time.sleep(5)  # Wait for login
# driver.get(attendance_url)

# # **Step 3: Download attendance reports for each period**
# for period in report_periods:
#     start_date = period["start_date"]
#     end_date = period["end_date"]
#     final_filename = period["filename"]
#     final_filepath = os.path.join(download_folder, final_filename)

#     print(f"📌 Processing: {final_filename}")

#     # **Find and set "From Date"**
#     from_date_input = wait.until(EC.presence_of_element_located((By.ID, "from_attendance_date")))
#     driver.execute_script("arguments[0].removeAttribute('readonly')", from_date_input)
#     from_date_input.clear()
#     from_date_input.send_keys(start_date)

#     # **Find and set "To Date"**
#     to_date_input = wait.until(EC.presence_of_element_located((By.ID, "to_attendance_date")))
#     driver.execute_script("arguments[0].removeAttribute('readonly')", to_date_input)
#     to_date_input.clear()
#     to_date_input.send_keys(end_date)

#     print(f"✅ Date range set: {start_date} to {end_date}")

#     # **Step 4: Click "Download CSV"**
#     try:
#         download_button = wait.until(EC.element_to_be_clickable((By.ID, "smaplecsv")))  # ✅ Use correct button ID
#         download_button.click()
#         print("✅ Download initiated!")
#     except:
#         print("❌ Error: Download button not found!")

#     # **Step 5: Wait for the file to download**
#     time.sleep(10)  # Allow time for download

#     # **Step 6: Delete existing file if it already exists**
#     try:
#         if os.path.exists(final_filepath):
#             os.remove(final_filepath)
#             print(f"🗑️ Deleted existing file: {final_filename}")
#     except Exception as e:
#         print(f"❌ Error deleting existing file: {e}")

#     # **Step 7: Rename the latest downloaded file**
#     try:
#         # Find the most recently downloaded file
#         downloaded_files = sorted(
#             [f for f in os.listdir(download_folder) if f.endswith(".csv")],
#             key=lambda x: os.path.getctime(os.path.join(download_folder, x)),
#             reverse=True,
#         )

#         if downloaded_files:
#             latest_file = os.path.join(download_folder, downloaded_files[0])
#             os.rename(latest_file, final_filepath)
#             print(f"✅ File renamed to: {final_filename}")
#         else:
#             print("❌ No downloaded CSV file found!")
#     except Exception as e:
#         print(f"❌ Error renaming file: {e}")

#     # **Step 8: Wait between downloads**
#     time.sleep(5)  # Pause before next download

# # **Step 9: Close browser**
# driver.quit()
# print("✅ All reports downloaded successfully!")
