In [22]:
import pandas as pd
import sqlite3
from datetime import datetime

In [35]:
mst_jobs = pd.read_csv("csv_data/mst_job.csv")
tbl_learnlicdet = pd.read_csv("csv_data/tbl_learnlicdet.csv")
tbl_permanentlicdet = pd.read_csv("csv_data/tbl_permanentlicdet.csv")
tbl_stufees = pd.read_csv("csv_data/tbl_stufees.csv")
yash_students = pd.read_csv("csv_data/yash_students.csv")


print("MST Jobs:")
print(mst_jobs.head(), "\n")
print("Learn License Details:")
print(tbl_learnlicdet.head(), "\n")
print("Permanent License Details:")
print(tbl_permanentlicdet.head(), "\n")
print("Student Fees:")
print(tbl_stufees.head(), "\n")
print("Yash Students:")
print(yash_students.head(), "\n")

MST Jobs:
   JobID                JobName  \
0      1     4 Wheeler Learning   
1      2   Practicing 4 Wheeler   
2      3    Issuing New License   
3      4        License Renewal   
4      5  Convert To Smart Card   

                                             JobDesc JobType  
0  Learning four wheeler with temporary license &...     EXE  
1            Practicing four wheeler without license     EXE  
2       Application for new license without training     RTO  
3    Renewing expired license (all types of license)     RTO  
4  Application for duplicate license with old lic...     RTO   

Learn License Details:
   StuID                       LicNo             DtEnroll  \
0     19         LL/38989/13,   ,     2013-05-02 00:00:00   
1     48             LL/43349/13, ,   2013-05-16 00:00:00   
2     49             LL/43415/13, ,   2013-05-16 00:00:00   
3     51           LL/43629/13,  ,    2013-05-17 00:00:00   
4     46  LL/43351/13, LL/43350/13,   2013-05-16 00:00:00   

         

In [36]:
# Lowercase only string columns, not column names
def lowercase_df(df):
    for col in df.select_dtypes(include='object').columns:
        df[col] = df[col].str.lower().str.strip()
    return df

mst_jobs = lowercase_df(mst_jobs)
tbl_learnlicdet = lowercase_df(tbl_learnlicdet)
tbl_permanentlicdet = lowercase_df(tbl_permanentlicdet)
tbl_stufees = lowercase_df(tbl_stufees)
yash_students = lowercase_df(yash_students)

In [45]:
# Split LL numbers and classes if there are multiple
yash_students[['ll_no_1', 'll_no_2']] = yash_students['learn_lic_no'].str.split(pat=',', n=1, expand=True)
yash_students[['ll_class_1', 'll_class_2']] = yash_students['learn_lic_type'].str.split(pat=',', n=1, expand=True)

# Map columns to database table
yash_students.rename(columns={
    "StuID": "id",
    "StuPhone": "mobile_number",
    "StuName": "customer_name",
    "StuDOB": "customer_dob",
    "StuPerAdd": "address",
    "CarID": "vehicle_id",
    "InstID": "instructor_id",
    "learn_dt_issue": "ll_issued_date",
    "learn_dt_expiry": "ll_validity_date",
    "perm_lic_no": "mdl_no",
    "perm_lic_type": "mdl_class",
    "perm_dt_issue": "mdl_issued_date",
    "perm_dt_expiry": "mdl_validity_date",
    "Endorse": "endorsement",
    "EndorseDate": "endorsement_date",
    "StuSDWOf": "relation_name",
    "LastUpdatedDate": "created_on"
}, inplace=True)

# Add extra columns
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
yash_students["updated_on"] = now
yash_students["customer_image"] = None
yash_students["customer_signature"] = None
yash_students["endorsement_validity_date"] = None
yash_students["customer_vehicle_no"] = None

# Convert dates to standard format
date_cols = ['customer_dob', 'll_issued_date', 'll_validity_date', 'mdl_issued_date', 'mdl_validity_date', 'endorsement_date', 'created_on']
for col in date_cols:
    if col in yash_students.columns:
        yash_students[col] = pd.to_datetime(yash_students[col], errors='coerce').dt.strftime("%Y-%m-%d %H:%M:%S")

# Get actual columns from the customers table in SQLite
conn = sqlite3.connect("guide-mts-data.sqlite3")
cursor = conn.execute("PRAGMA table_info(customers)")
table_columns = [row[1] for row in cursor.fetchall()]

# Remove 'll_no_2' from columns if it exists to avoid UNIQUE constraint error
columns_to_insert = [col for col in table_columns if col in yash_students.columns and col != 'll_no_2']
yash_students_to_insert = yash_students[columns_to_insert]

# Drop duplicate mdl_no values (keep first occurrence)
if 'mdl_no' in yash_students_to_insert.columns:
    yash_students_to_insert = yash_students_to_insert.drop_duplicates(subset=['mdl_no'], keep='first')

# Drop duplicate mobile_number values (keep first occurrence)
if 'mobile_number' in yash_students_to_insert.columns:
    yash_students_to_insert = yash_students_to_insert.drop_duplicates(subset=['mobile_number'], keep='first')

# Drop duplicate ll_no_1 values (keep first occurrence)
if 'll_no_1' in yash_students_to_insert.columns:
    yash_students_to_insert = yash_students_to_insert.drop_duplicates(subset=['ll_no_1'], keep='first')

# Optionally, also drop rows where mdl_no is NaN (if mdl_no is required to be unique and not null)
if 'mdl_no' in yash_students_to_insert.columns:
    yash_students_to_insert = yash_students_to_insert[yash_students_to_insert['mdl_no'].notna()]

# Insert into SQLite
yash_students_to_insert.to_sql('customers', conn, if_exists='append', index=False)
conn.close()

print("Data inserted successfully!")

Data inserted successfully!
