In [None]:
import os
import sys
import pandas as pd

from sklearn.model_selection import train_test_split
from dataclasses import dataclass

from lib.exceptions import CustomException
from lib.logger import logging
from src.preprocessing import DataTransformation
from src.preprocessing import DataTransformationConfig

from src.components.model_trainer import ModelTrainerConfig
from src.components.model_trainer import ModelTrainer

### Create Database connection

In [None]:
import mysql.connector

# Replace these values with your actual credentials
db_config = {
    'host': 'localhost',
    'user': 'jogesh',
    'password': 'Jogesh_295',
    'database': 'coordinates_data',
}

try:
    # Establish a connection to the MySQL server
    connection = mysql.connector.connect(**db_config)

    # Create a cursor object to interact with the database
    cursor = connection.cursor()

    # Check and update user permissions
    #cursor.execute(f"GRANT ALL PRIVILEGES ON {db_config['database']}.* TO '{db_config['user']}'@'{db_config['host']}' IDENTIFIED BY '{db_config['password']}';")

    # Commit the changes to the server
    connection.commit()

    # Close the cursor and connection
    cursor.close()
    connection.close()

    print("Connection successful!")

except mysql.connector.Error as err:
    print(f"Error connecting to MySQL: {err}")

### Ingest  and preprocess the data

In [None]:
#pip install mysql-connector-python
# use above code if not mysql connector in not installed
from sklearn.model_selection import train_test_split
import mysql.connector
import pandas as pd
import os

class DataIngestion:
    def __init__(self, folder_path, host, user, password, database, table):
        self.folder_path = folder_path
        self.host = host
        self.user = user
        self.password = password
        self.database = database
        self.table = table

    def ingest_data(self):
        try:
            # Connect to the MySQL database
            connection = mysql.connector.connect(
                host=self.host,
                user=self.user,
                password=self.password,
                database=self.database
            )
            cursor = connection.cursor()

            # Execute a query to fetch data from the specified table
            query = f"SELECT * FROM {self.table}"
            cursor.execute(query)

            # Fetch all the rows
            data = cursor.fetchall()

            # Get column names from the cursor description
            column_names = [desc[0] for desc in cursor.description]

            # Create a DataFrame from the fetched data
            df = pd.DataFrame(data, columns=column_names)
            train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)

            # Store the DataFrame as CSV in the specified folder
            os.makedirs(self.folder_path, exist_ok=True)
            file_path = os.path.join(self.folder_path, 'data.csv')
            train_path = os.path.join(self.folder_path, 'train.csv')
            test_path = os.path.join(self.folder_path, 'test.csv')
            df.to_csv(file_path, index=False)
            train_set.to_csv(train_path, index=False)
            test_set.to_csv(test_path, index=False)

            print("Data ingestion completed successfully.")

        except mysql.connector.Error as e:
            print(f"Error connecting to MySQL: {e}")

        except Exception as e:
            print(f"An error occurred: {e}")

if __name__ == "__main__":
    folder_path = '.\data\raw_data'
    host = 'localhost'
    user = 'jogesh'
    password = 'Jogesh_295'
    database = 'coordinates_data'
    table = 'coordinates_table'

    data_ingestion_instance = DataIngestion(folder_path, host, user, password, database, table)
    data_ingestion_instance.ingest_data()




### preprocessing class

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from lib.utils import save_pkl_object
import os
import pickle
import sys

class CustomException(Exception):
    pass

class Preprocessing:
    def __init__(self, scaler=StandardScaler()):
        self.scaler = scaler

    def preprocess(self, X_train, y_train, X_test, y_test):
        # Standard scaling on X_train and X_test
        scaled_X_train = self.scaler.fit_transform(X_train)
        scaled_X_test = self.scaler.transform(X_test)

        return scaled_X_train, y_train, scaled_X_test, y_test


# Example usage:
# Assuming X_train, y_train, X_test, y_test are already defined as DataFrames

preprocessor = Preprocessing()

scaled_X_train, y_train, scaled_X_test, y_test = preprocessor.preprocess(X_train, y_train, X_test, y_test)

preprocessor_path = '.\models\preprocessor.pkl'

preprocessor.save_pkl_object(preprocessor_path)
