In [None]:
import pandas as pd
import mysql.connector
from mysql.connector import Error
from dotenv import load_dotenv
import os
from datetime import datetime
 
# Load environment variables from .env file
load_dotenv()
 
# Connection details from environment variables
FILESS_MYSQL_HOST = os.getenv("FILESS_MYSQL_HOST")
FILESS_MYSQL_PORT = int(os.getenv("FILESS_MYSQL_PORT"))
FILESS_MYSQL_USER = os.getenv("FILESS_MYSQL_USER")
FILESS_MYSQL_PASSWORD = os.getenv("FILESS_MYSQL_PASSWORD")
FILESS_MYSQL_DATABASE = os.getenv("FILESS_MYSQL_DATABASE")
FILESS_MYSQL_TABLE = os.getenv("FILESS_MYSQL_TABLE")
 
# CSV file path (make sure this path is correct)
csv_file_path = "Weather_data.csv"  # Correct the file path if needed
 
# Table name where the data will be uploaded (make sure to update this)
table_name = FILESS_MYSQL_TABLE  # Table name from the environment variable
 
try:
    # Step 1: Establish a connection to MySQL server
    connection = mysql.connector.connect(
        host=FILESS_MYSQL_HOST,
        database=FILESS_MYSQL_DATABASE,
        user=FILESS_MYSQL_USER,
        password=FILESS_MYSQL_PASSWORD,
        port=FILESS_MYSQL_PORT
    )
 
    if connection.is_connected():
        print("Connected to MySQL Server successfully!")
 
        # Step 2: Create a cursor to execute SQL queries
        cursor = connection.cursor()
 
        # Step 3: Drop table if it already exists (for clean insertion)
        cursor.execute(f"DROP TABLE IF EXISTS {table_name};")
        print(f"Table `{table_name}` dropped if it existed.")
 
        # Step 4: Create a table structure to match the new schema
        create_table_query = f"""
        CREATE TABLE {table_name} (
            name VARCHAR(50),
            datetime DATE,
            tempmax FLOAT,
            tempmin FLOAT,
            temp FLOAT,
            feelslikemax FLOAT,
            feelslikemin FLOAT,
            feelslike FLOAT,
            dew FLOAT,
            humidity FLOAT,
            precip FLOAT,
            precipprob FLOAT,
            precipcover FLOAT,
            preciptype VARCHAR(50),
            snow FLOAT,
            snowdepth FLOAT,
            windgust FLOAT,
            windspeed FLOAT,
            winddir FLOAT,
            sealevelpressure FLOAT,
            cloudcover FLOAT,
            visibility FLOAT,
            solarradiation FLOAT,
            solarenergy FLOAT,
            uvindex FLOAT,
            severerisk FLOAT,
            sunrise TIME,
            sunset TIME,
            moonphase FLOAT,
            conditions VARCHAR(255),
            description VARCHAR(255),
            icon VARCHAR(50),
            stations VARCHAR(100)
        );
        """
        cursor.execute(create_table_query)
        print(f"Table `{table_name}` created successfully!")
 
        # Step 5: Load the CSV data into pandas DataFrame
        data = pd.read_csv(csv_file_path)
        print("CSV data loaded into pandas DataFrame.")
 
        # Step 6: Insert data one record at a time
        total_records = len(data)  # Get total records in the DataFrame
        print(f"Starting data insertion into `{table_name}` for {total_records} records.")
 
        # Insert each record one by one
        for idx, row in data.iterrows():
            # Ensure 'datetime' is formatted correctly as 'YYYY-MM-DD'
            try:
                formatted_datetime = datetime.strptime(str(row['datetime']), '%Y-%m-%d').date()
            except ValueError:
                formatted_datetime = None  # Handle cases where datetime format might be incorrect
 
            # Handle sunrise and sunset time formatting
            def extract_time_from_datetime(datetime_str):
                try:
                    # Assuming the datetime string is in the format 'YYYY-MM-DDTHH:MM:SS'
                    return datetime.strptime(datetime_str, "%Y-%m-%dT%H:%M:%S").time() if pd.notna(datetime_str) else None
                except ValueError:
                    return None
 
            formatted_sunrise = extract_time_from_datetime(row['sunrise'])
            formatted_sunset = extract_time_from_datetime(row['sunset'])
 
            # Prepare data for insert (replace NaN with None)
            record = (
                row['name'] if pd.notna(row['name']) else None,
                formatted_datetime,
                row['tempmax'] if pd.notna(row['tempmax']) else None,
                row['tempmin'] if pd.notna(row['tempmin']) else None,
                row['temp'] if pd.notna(row['temp']) else None,
                row['feelslikemax'] if pd.notna(row['feelslikemax']) else None,
                row['feelslikemin'] if pd.notna(row['feelslikemin']) else None,
                row['feelslike'] if pd.notna(row['feelslike']) else None,
                row['dew'] if pd.notna(row['dew']) else None,
                row['humidity'] if pd.notna(row['humidity']) else None,
                row['precip'] if pd.notna(row['precip']) else None,
                row['precipprob'] if pd.notna(row['precipprob']) else None,
                row['precipcover'] if pd.notna(row['precipcover']) else None,
                row['preciptype'] if pd.notna(row['preciptype']) else None,
                row['snow'] if pd.notna(row['snow']) else None,
                row['snowdepth'] if pd.notna(row['snowdepth']) else None,
                row['windgust'] if pd.notna(row['windgust']) else None,
                row['windspeed'] if pd.notna(row['windspeed']) else None,
                row['winddir'] if pd.notna(row['winddir']) else None,
                row['sealevelpressure'] if pd.notna(row['sealevelpressure']) else None,
                row['cloudcover'] if pd.notna(row['cloudcover']) else None,
                row['visibility'] if pd.notna(row['visibility']) else None,
                row['solarradiation'] if pd.notna(row['solarradiation']) else None,
                row['solarenergy'] if pd.notna(row['solarenergy']) else None,
                row['uvindex'] if pd.notna(row['uvindex']) else None,
                row['severerisk'] if pd.notna(row['severerisk']) else None,
                formatted_sunrise,
                formatted_sunset,
                row['moonphase'] if pd.notna(row['moonphase']) else None,
                row['conditions'] if pd.notna(row['conditions']) else None,
                row['description'] if pd.notna(row['description']) else None,
                row['icon'] if pd.notna(row['icon']) else None,
                row['stations'] if pd.notna(row['stations']) else None,
            )
            
            # Construct the SQL query with proper placeholders
            insert_query = f"""
            INSERT INTO {table_name} (
                name, datetime, tempmax, tempmin, temp, feelslikemax, feelslikemin, feelslike,
                dew, humidity, precip, precipprob, precipcover, preciptype, snow, snowdepth,
                windgust, windspeed, winddir, sealevelpressure, cloudcover, visibility,
                solarradiation, solarenergy, uvindex, severerisk, sunrise, sunset, moonphase,
                conditions, description, icon, stations
            )
            VALUES ({', '.join(['%s'] * len(record))});
            """
            
            # Execute the insertion query for the current row
            cursor.execute(insert_query, record)
            connection.commit()  # Commit after each insertion
            print(f"Inserted record {idx + 1} of {total_records} successfully.")
 
        print(f"All {total_records} records inserted successfully into `{table_name}`.")
 
except Error as e:
    # Step 7: Handle any errors
    print("Error while connecting to MySQL or inserting data:", e)
 
finally:
    # Step 8: Close the cursor and connection
    if connection.is_connected():
        cursor.close()
        connection.close()
        print("MySQL connection is closed.")