In [4]:
import mysql.connector
import pandas as pd
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv

In [5]:
def setConnection():
    try:
        load_dotenv()

        # Retrieve MySQL connection details from environment variables
        host = os.getenv('DB_HOST')
        user = os.getenv('DB_USER')
        password = os.getenv('DB_PASSWORD')
        connection = mysql.connector.connect(
            host='localhost',
            user='root',
            password='user',
            autocommit=True  # Set autocommit to True
        )
        cursor = connection.cursor()
        cursor.execute("SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED") 
        return cursor, connection
    except mysql.connector.Error as e:
        print(e)
        return None, None 

In [6]:
def createDatabase(cursor, name):
    try:
        cursor.execute(f"CREATE DATABASE IF NOT EXISTS {name}")
        cursor.execute(f"USE {name}")  
    except mysql.connector.Error as e:
        print(e)

In [7]:
def createTable(cursor, tablename, dataframe,databaseName):
    try:
        engine = create_engine(f"mysql+mysqlconnector://root:user@localhost/{databaseName}")
        dataframe.to_sql(name=tablename, con=engine, if_exists='replace', index=False)
        print(f"Table '{tablename}' successfully installed in the database")
        cursor.execute(f"DESCRIBE {tablename}")
        print("Table schema:")
        for column in cursor.fetchall():
            print(column)
    except mysql.connector.Error as mysql_error:
        print(f"MySQL Connector Error: {mysql_error}")
    except Exception as sqlalchemy_error:
        print(f"SQLAlchemy Error: {sqlalchemy_error}")

In [8]:
def query(cursor,queryString):
    try:
        cursor.execute(queryString)
        rows = cursor.fetchall()
        for row in rows:
            print(row)
    except mysql.connector.Error as e:
        print(e)
        

In [9]:
def closeConnection(cursor,connection):
    try:
        cursor.close()
        connection.close()
    except mysql.connector.Error as e:
        print(e)

LOading Datasets


In [None]:
def loadDatasetcsv(name, unwanted_columns, frame=None):
    if frame is None:
        frame = pd.read_csv(f'../data/{name}.csv')
    else:
        frame = frame.copy()  # Create a copy of the DataFrame to avoid modifying the original

    # Drop unwanted columns
    for column in unwanted_columns:
        frame.drop(columns=[column], inplace=True)
    
    # Ensure frame is a DataFrame
    if not isinstance(frame, pd.DataFrame):
        frame = pd.DataFrame(frame)
    
    return frame

# Function to process salary string
def process_salary(salary):
    if '/yr' in salary:
        # If salary is per year, remove '/yr' and convert to int
        salary = int(salary.replace('₹', '').replace(',', '').replace('/yr', ''))
    elif '/mo' in salary:
        # If salary is per month, remove '/mo', convert to int, and convert to yearly salary
        salary = int(salary.replace('₹', '').replace(',', '').replace('/mo', '')) * 12
    return salary

In [None]:
def loadDatasetxl(name, unwanted_columns, frame=None):
    if frame is None:
        frame = pd.read_csv(f'./data/{name}.xls')
    else:
        frame = frame.copy()  # Create a copy of the DataFrame to avoid modifying the original

    # Drop unwanted columns
    for column in unwanted_columns:
        frame.drop(columns=[column], inplace=True)
    
    # Ensure frame is a DataFrame
    if not isinstance(frame, pd.DataFrame):
        frame = pd.DataFrame(frame)
    
    return frame