In [1]:
import pandas as pd
import sqlite3
from datetime import datetime

In [2]:
# Step 1: Define file paths directly
csv_file = "./29092025_Latest_DB/csv/mst_instructor.csv"   # adjust path to your CSV
sqlite_file = "./29092025_Latest_DB/guide-mts-database.sqlite3"   # adjust path to your SQLite DB

print(f"CSV selected: {csv_file}")
print(f"SQLite selected: {sqlite_file}")

CSV selected: ./29092025_Latest_DB/csv/mst_instructor.csv
SQLite selected: ./29092025_Latest_DB/guide-mts-database.sqlite3


In [3]:
# Step 2: Load CSV into pandas
df = pd.read_csv(csv_file)
print("\nOriginal CSV Data:")
print(df.head(20))


Original CSV Data:
    InstID               InstName  \
0        1                  Dummy   
1        2       PURANSINGH.DHAMI   
2        3     NOOR MOHAMMAD KHAN   
3        4    MOHD JAFFER  SHAIKH   
4        5              AMIN KHAN   
5        6          RAJJAK SHAIKH   
6        7  MILIND KAMALAKAR KENY   
7        8                ASHWINI   
8        9                MUKHTAR   
9       10                BRIJESH   
10      11          BAHADUR SINGH   
11      12          TAREEK ANSARI   
12      13                RAVIDRA   
13      14               RAVINDRA   
14      15                MANGESH   
15      16                  AJAY    

                                         InstAdd   InstSts  \
0                                          Dummy  Inactive   
1              RAWALI CAMP SION KOLIWADA MUM-37.  Inactive   
2                SOCIAL NAGAR DHARAVI\r\nMUM-17.  Inactive   
3               SHIVAJI NAGAR\r\nGOVANDI MUM-43.  Inactive   
4       GULISTAN BUILDING,KURLA, MUMBAI 

In [4]:
# Step 2.1: Clean headers (strip spaces/quotes) and lowercase
df.columns = df.columns.str.strip().str.replace('"', '').str.lower()

# Step 2.2: Convert all string values in the DataFrame to lowercase
df = df.applymap(lambda x: x.lower() if isinstance(x, str) else x)

print(df.head())

   instid             instname                                   instadd  \
0       1                dummy                                     dummy   
1       2     puransingh.dhami         rawali camp sion koliwada mum-37.   
2       3   noor mohammad khan           social nagar dharavi\r\nmum-17.   
3       4  mohd jaffer  shaikh          shivaji nagar\r\ngovandi mum-43.   
4       5            amin khan  gulistan building,kurla, mumbai - 400070   

    inststs      lastupdateddate  
0  inactive  2017-07-08 00:00:00  
1  inactive  2018-05-12 00:00:00  
2  inactive  2015-10-02 00:00:00  
3  inactive  2016-10-13 00:00:00  
4  inactive  2013-06-08 00:00:00  


  df = df.applymap(lambda x: x.lower() if isinstance(x, str) else x)


In [5]:
# Step 3: Data Cleaning & Transformation
# Map Active/Inactive to boolean
df["inststs"] = df["inststs"].map({"active": "true", "inactive": "false"})

In [6]:
# Rename columns to match instructors table schema
df.rename(columns={
    "instid": "id",
    "instname": "instructor_name",
    "instadd": "instructor_license_no", # temporary placeholder
    "inststs": "is_active",
    "lastupdateddate": "created_on"
}, inplace=True)

In [7]:
# Add columns required by schema
df["license_expiration_date"] = "null" # No values available → set as NULL

In [8]:
# Overwrite placeholder: instructor_license_no should be NULL instead of address
df["instructor_license_no"] = "null"

In [9]:
# Add updated_on = current local datetime
df["updated_on"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

In [10]:
# Reorder columns to match table
df = df[["id", "instructor_name", "instructor_license_no", "license_expiration_date", "is_active", "created_on", "updated_on"]]

print("\nTransformed Data:")
print(df.tail())


Transformed Data:
    id instructor_name instructor_license_no license_expiration_date  \
11  12   tareek ansari                  null                    null   
12  13         ravidra                  null                    null   
13  14        ravindra                  null                    null   
14  15         mangesh                  null                    null   
15  16           ajay                   null                    null   

   is_active           created_on           updated_on  
11      true  2021-08-28 00:00:00  2025-10-01 12:32:52  
12     false  2018-05-12 00:00:00  2025-10-01 12:32:52  
13     false  2018-11-02 00:00:00  2025-10-01 12:32:52  
14     false  2021-08-28 00:00:00  2025-10-01 12:32:52  
15      true  2020-03-09 00:00:00  2025-10-01 12:32:52  


In [11]:
# Step 4: Insert into SQLite
conn = sqlite3.connect(sqlite_file)
cursor = conn.cursor()

In [12]:
# Insert data into table
df.to_sql("instructors", conn, if_exists="append", index=False)


conn.commit()
conn.close()


print("\nData inserted into instructors table successfully.")


Data inserted into instructors table successfully.
