<a href="https://colab.research.google.com/github/alvinfranklyndavis/Project2023_v3/blob/main/Data_Prep_GPT_4_Bard_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
# CELL 1.1: Package Installation and Library Import

# Upgrade pip and install required packages
!pip install -U --upgrade-strategy eager pip
!pip install -U --upgrade-strategy eager pandas numpy

# Import required libraries
import pandas as pd
import numpy as np
import logging
import os

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


[0m

In [11]:
# Cell 1.2: Data Loading from Google Drive and preprocessing Training / Testing dataset

import pandas as pd
import logging
import os
from google.colab import drive

# Set up logging
logger = logging.getLogger(__name__)

# Mount Google Drive
drive.mount('/content/drive')

# Define the directory for datasets in Google Drive
drive_dataset_directory = '/content/drive/My Drive/Predictive_Modeling_Four_Draws/Morning_Draw_Model_Docs/Initial_Data_Prep/'

# Define the paths to the CSV files
csv_filename_train_test = 'A_Initial_Train_Test_Data.csv'
csv_filename_unseen = 'B_Initial_Unseen_Data.csv'

drive_csv_path_train_test = os.path.join(drive_dataset_directory, csv_filename_train_test)
drive_csv_path_unseen = os.path.join(drive_dataset_directory, csv_filename_unseen)

# Check and load the datasets
def load_dataset(file_path):
    if os.path.isfile(file_path):
        print("File found. Proceeding to load the dataset.")
        return pd.read_csv(file_path)
    else:
        print("File not found. Check the file path or the Google Drive mount.")
        return None

train_test_data = load_dataset(drive_csv_path_train_test)
unseen_data = load_dataset(drive_csv_path_unseen)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
File found. Proceeding to load the dataset.
File found. Proceeding to load the dataset.


In [12]:
# Cell 1.3: Preprocessing Training/Testing Data

import shutil

# Function to preprocess training/testing data
def preprocess_train_test_data(data):
    print("Initial data columns:", data.columns)

    # Remove rows where 'Draw1' is NaN
    data = data.dropna(subset=['Draw1'])

    # Convert 'Date' to datetime with the correct format
    if 'Date' in data.columns:
        print("Converting 'Date' to datetime...")
        data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%y')  # Specify the correct format here
        data['Year'] = data['Date'].dt.year
        data['Month'] = data['Date'].dt.month
        data['Day'] = data['Date'].dt.day
        print("After extracting Year, Month, Day:", data.columns)
        data.drop(columns=['Date'], inplace=True)
        print("After dropping 'Date':", data.columns)
    else:
        print("Date column not found in the given dataset.")

    # Initialize TARGET VARIABLE 'Prediction1' column with 'Draw1' values
    data['Prediction1'] = data['Draw1']

    # Create shifted columns for previous day's data
    data['Prev_Morning'] = data['Draw1'].shift(1)
    data['Prev_Afternoon'] = data['Draw2'].shift(1)
    data['Prev_Evening'] = data['Draw3'].shift(1)
    data['Prev_Night'] = data['Draw4'].shift(1)

    # Handle NaN values
    data['Prev_Morning'].fillna(13, inplace=True)
    data['Prev_Afternoon'].fillna(34, inplace=True)
    data['Prev_Evening'].fillna(32, inplace=True)
    data['Prev_Night'].fillna(23, inplace=True)

    # Select relevant columns, including 'Prediction1'
    selected_columns = ['Row Number', 'Data_Type', 'Draw1', 'DR1_Prev_Week', 'DR1_2Weeks',
    'DR1_Prev_Entry', 'DR1_Prev_Entry-2', 'DR1_Mov_Avg', 'DR1_Vert_Avg', 'Prev_Morning', 'Prev_Afternoon', 'Prev_Evening', 'Prev_Night', 'Prediction1', 'Year', 'Month', 'Day']
    data = data[selected_columns]

    return data

# Apply preprocessing to the training/testing dataset
train_test_data = preprocess_train_test_data(train_test_data)

# Save the preprocessed training/testing data to a temporary location
temp_save_path = '/content/C_Preprocessed_Train_Test_Data.csv'
train_test_data.to_csv(temp_save_path, index=False)

# Move the saved CSV to your Google Drive folder
save_path = '/content/drive/My Drive/Predictive_Modeling_Four_Draws/Morning_Draw_Model_Docs/Initial_Data_Prep/'
shutil.move(temp_save_path, os.path.join(save_path, 'C_Preprocessed_Train_Test_Data.csv'))

# Display the first few rows of the preprocessed data for verification
print("First few rows of preprocessed training/testing data:")
print(train_test_data.head())


Initial data columns: Index(['Date', 'Row Number', 'Data_Type', 'Draw1', 'DR1_Prev_Week',
       'DR1_2Weeks', 'DR1_Prev_Entry', 'DR1_Prev_Entry-2', 'DR1_Mov_Avg',
       'DR1_Vert_Avg', 'Draw2', 'DR2_Prev_Week', 'DR2_2Weeks',
       'DR2_Prev_Entry', 'DR2_Prev_Entry-2', 'DR2_Mov_Avg', 'DR2_Vert_Avg',
       'Draw3', 'DR3_Prev_Week', 'DR3_2Weeks', 'DR3_Prev_Entry',
       'DR3_Prev_Entry-2', 'DR3_Mov_Avg', 'DR3_Vert_Avg', 'Draw4',
       'DR4_Prev_Week', 'DR4_2Weeks', 'DR4_Prev_Entry', 'DR4_Prev_Entry-2',
       'DR4_Mov_Avg', 'DR4_Vert_Avg'],
      dtype='object')
Converting 'Date' to datetime...
After extracting Year, Month, Day: Index(['Date', 'Row Number', 'Data_Type', 'Draw1', 'DR1_Prev_Week',
       'DR1_2Weeks', 'DR1_Prev_Entry', 'DR1_Prev_Entry-2', 'DR1_Mov_Avg',
       'DR1_Vert_Avg', 'Draw2', 'DR2_Prev_Week', 'DR2_2Weeks',
       'DR2_Prev_Entry', 'DR2_Prev_Entry-2', 'DR2_Mov_Avg', 'DR2_Vert_Avg',
       'Draw3', 'DR3_Prev_Week', 'DR3_2Weeks', 'DR3_Prev_Entry',
       'DR3_Pr

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%y')  # Specify the correct format here
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Year'] = data['Date'].dt.year
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Month'] = data['Date'].dt.month
A value is trying to be set on a 

In [14]:
# Cell 1.4: Preprocessing Model Unseen Data

# Define the path to the CSV file for model unseen data
csv_filename_unseen = 'B_Initial_Unseen_Data.csv'
drive_csv_path_unseen = os.path.join(drive_dataset_directory, csv_filename_unseen)

# Load the model unseen data
model_unseen_data = load_dataset(drive_csv_path_unseen)

# Function to preprocess unseen data
def preprocess_unseen_data(data):
    print("Initial data columns:", data.columns)

    # Remove rows where 'Draw1' is NaN
    data = data.dropna(subset=['Draw1'])

    # Define possible date formats
    date_formats = ['%d-%m-%y', '%d/%m/%Y']

    # Try to convert 'Date' to datetime with different formats
    for date_format in date_formats:
        try:
            if 'Date' in data.columns:
                print("Converting 'Date' to datetime...")
                data['Date'] = pd.to_datetime(data['Date'], format=date_format)
                data['Year'] = data['Date'].dt.year
                data['Month'] = data['Date'].dt.month
                data['Day'] = data['Date'].dt.day
                print("After extracting Year, Month, Day:", data.columns)
                data.drop(columns=['Date'], inplace=True)
                print("After dropping 'Date':", data.columns)
            else:
                print("Date column not found in the given dataset.")

            # Initialize TARGET VARIABLE 'Prediction1' column with NaN values
            data['Prediction1'] = np.nan

            # Create shifted columns for previous day's data
            data['Prev_Morning'] = data['Draw1'].shift(1)
            data['Prev_Afternoon'] = data['Draw2'].shift(1)
            data['Prev_Evening'] = data['Draw3'].shift(1)
            data['Prev_Night'] = data['Draw4'].shift(1)

            # Handle NaN values
            data['Prev_Morning'].fillna(25, inplace=True)
            data['Prev_Afternoon'].fillna(9, inplace=True)
            data['Prev_Evening'].fillna(7, inplace=True)
            data['Prev_Night'].fillna(5, inplace=True)

            # Select relevant columns, including 'Prediction1'
            selected_columns = ['Row Number', 'Data_Type', 'Draw1', 'DR1_Prev_Week', 'DR1_2Weeks',
            'DR1_Prev_Entry', 'DR1_Prev_Entry-2', 'DR1_Mov_Avg', 'DR1_Vert_Avg', 'Prev_Morning', 'Prev_Afternoon', 'Prev_Evening', 'Prev_Night', 'Prediction1', 'Year', 'Month', 'Day']
            data = data[selected_columns]

            # Save the preprocessed model unseen data to a temporary location
            temp_save_path = '/content/D_Preprocessed_Unseen_Data.csv'
            data.to_csv(temp_save_path, index=False)

            # Move the saved CSV to your Google Drive folder
            save_path = '/content/drive/My Drive/Predictive_Modeling_Four_Draws/Morning_Draw_Model_Docs/Initial_Data_Prep/'
            shutil.move(temp_save_path, os.path.join(save_path, 'D_Preprocessed_Unseen_Data.csv'))

            # Display the first few rows of the preprocessed data for verification
            print("First few rows of preprocessed model unseen data:")
            print(data.head())

            break  # Break the loop if successful date conversion

        except ValueError:
            print("Failed to convert 'Date' with format:", date_format)

# Apply preprocessing to the model unseen data
preprocess_unseen_data(model_unseen_data)


File found. Proceeding to load the dataset.
Initial data columns: Index(['Date', 'Row Number', 'Data_Type', 'Draw1', 'DR1_Prev_Week',
       'DR1_2Weeks', 'DR1_Prev_Entry', 'DR1_Prev_Entry-2', 'DR1_Mov_Avg',
       'DR1_Vert_Avg', 'Draw2', 'DR2_Prev_Week', 'DR2_2Weeks',
       'DR2_Prev_Entry', 'DR2_Prev_Entry-2', 'DR2_Mov_Avg', 'DR2_Vert_Avg',
       'Draw3', 'DR3_Prev_Week', 'DR3_2Weeks', 'DR3_Prev_Entry',
       'DR3_Prev_Entry-2', 'DR3_Mov_Avg', 'DR3_Vert_Avg', 'Draw4',
       'DR4_Prev_Week', 'DR4_2Weeks', 'DR4_Prev_Entry', 'DR4_Prev_Entry-2',
       'DR4_Mov_Avg', 'DR4_Vert_Avg'],
      dtype='object')
Converting 'Date' to datetime...
Failed to convert 'Date' with format: %d-%m-%y
Converting 'Date' to datetime...
After extracting Year, Month, Day: Index(['Date', 'Row Number', 'Data_Type', 'Draw1', 'DR1_Prev_Week',
       'DR1_2Weeks', 'DR1_Prev_Entry', 'DR1_Prev_Entry-2', 'DR1_Mov_Avg',
       'DR1_Vert_Avg', 'Draw2', 'DR2_Prev_Week', 'DR2_2Weeks',
       'DR2_Prev_Entry', 'DR2_P

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Date'] = pd.to_datetime(data['Date'], format=date_format)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Year'] = data['Date'].dt.year
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Month'] = data['Date'].dt.month
A value is trying to be set on a copy of a slice from a DataFrame.


In [24]:
# Cell 2.1: Creating "LINES" feature for Training/Testing and Unseen Datasets

import pandas as pd
import os
import shutil

# Define the path to the CSV file for preprocessed training/testing data
preprocessed_train_test_data_path = '/content/drive/My Drive/Predictive_Modeling_Four_Draws/Morning_Draw_Model_Docs/Initial_Data_Prep/C_Preprocessed_Train_Test_Data.csv'

# Define the path to the CSV file for preprocessed model unseen data
preprocessed_unseen_data_path = '/content/drive/My Drive/Predictive_Modeling_Four_Draws/Morning_Draw_Model_Docs/Initial_Data_Prep/D_Preprocessed_Unseen_Data.csv'

# Load preprocessed training/testing data
preprocessed_train_test_data = pd.read_csv(preprocessed_train_test_data_path)

# Load preprocessed unseen data
preprocessed_unseen_data = pd.read_csv(preprocessed_unseen_data_path)

# List of columns to convert to integers
int_columns = ['Draw1', 'DR1_Prev_Week', 'DR1_Prev_Entry', 'DR1_Prev_Entry-2', 'DR1_Mov_Avg', 'DR1_Vert_Avg',
               'Prev_Morning', 'Prev_Afternoon', 'Prev_Evening', 'Prev_Night']

# Convert specified columns to integers for both datasets
preprocessed_train_test_data[int_columns] = preprocessed_train_test_data[int_columns].astype(int)
preprocessed_unseen_data[int_columns] = preprocessed_unseen_data[int_columns].astype(int)

# Function to assign "Lines" based on the sum of digits
def assign_lines(data, column_name):
    def get_lines(x):
        try:
            # Calculate the sum of digits
            sum_of_digits = sum(map(int, str(x)))
            # Ensure the sum is between 1 and 9
            while sum_of_digits > 9:
                sum_of_digits = sum(map(int, str(sum_of_digits)))
            return sum_of_digits
        except (ValueError, TypeError):
            return None  # Handle non-convertible values by returning None

    data[f'Lines_{column_name}'] = data[column_name].apply(get_lines)

# Handle NaN values in the 'Prediction1' column for unseen data by filling them with 0
preprocessed_unseen_data['Prediction1'].fillna(0, inplace=True)

# Assign "Lines" for specified columns in both datasets
columns_to_assign_lines = ['Draw1', 'DR1_Prev_Week', 'DR1_Prev_Entry']
for column in columns_to_assign_lines:
    assign_lines(preprocessed_train_test_data, column)
    assign_lines(preprocessed_unseen_data, column)

# Define file paths for the new CSVs with "Lines"
lines_train_test_data_path = '/content/drive/My Drive/Predictive_Modeling_Four_Draws/Morning_Draw_Model_Docs/Initial_Data_Prep/E_Lines_Train_Test_Data.csv'
lines_unseen_data_path = '/content/drive/My Drive/Predictive_Modeling_Four_Draws/Morning_Draw_Model_Docs/Initial_Data_Prep/F_Lines_Unseen_Data.csv'

# Save the datasets with "Lines" to new CSVs
preprocessed_train_test_data.to_csv(lines_train_test_data_path, index=False)
preprocessed_unseen_data.to_csv(lines_unseen_data_path, index=False)

# Display a sample of the processed data for verification
print("Sample of preprocessed training/testing data with 'Lines_Draw1':")
print(preprocessed_train_test_data[['Row Number', 'Draw1', 'Lines_Draw1']].head())

print("\nSample of preprocessed unseen data with 'Lines_Draw1':")
print(preprocessed_unseen_data[['Row Number', 'Draw1', 'Lines_Draw1']].head())


Sample of preprocessed training/testing data with 'Lines_Draw1':
   Row Number  Draw1  Lines_Draw1
0           1     19            1
1           2     31            4
2           3     15            6
3           4     31            4
4           6     31            4

Sample of preprocessed unseen data with 'Lines_Draw1':
   Row Number  Draw1  Lines_Draw1
0        1410     13            4
1        1411     21            3
2        1412     15            6
3        1413     13            4
4        1414     12            3


In [None]:
# Cell 1.3: Data Loading from Google Drive and Preprocessing Unseen Dataset

import pandas as pd
import logging
import os
from google.colab import drive

# Set up logging
logger = logging.getLogger(__name__)

# Mount Google Drive
drive.mount('/content/drive')

# Define the directory for datasets in Google Drive
drive_dataset_directory = '/content/drive/My Drive/Predictive_Modeling_Four_Draws/Morning_Draw_Model_Docs/'

# Define the path to the CSV file for unseen data
csv_filename_unseen = 'Model_Unseen_Data.csv'
drive_csv_path_unseen = os.path.join(drive_dataset_directory, csv_filename_unseen)

# Check and load the dataset
def load_dataset(file_path):
    if os.path.isfile(file_path):
        print("File found. Proceeding to load the dataset.")
        return pd.read_csv(file_path)
    else:
        print("File not found. Check the file path or the Google Drive mount.")
        return None

unseen_data = load_dataset(drive_csv_path_unseen)

# Function to preprocess unseen data
def preprocess_unseen_data(data):
    print("Initial data columns:", data.columns)

    # Convert 'Date' to datetime and extract 'Year', 'Month', and 'Day'
    if 'Date' in data.columns:
        print("Converting 'Date' to datetime...")
        data['Date'] = pd.to_datetime(data['Date'])
        data['Year'] = data['Date'].dt.year
        data['Month'] = data['Date'].dt.month
        data['Day'] = data['Date'].dt.day
        print("After extracting Year, Month, Day:", data.columns)
        data.drop(columns=['Date'], inplace=True)
        print("After dropping 'Date':", data.columns)
    else:
        print("Date column not found in the given dataset.")

    # Initialize 'Prediction1' column with NaNs for unseen data
    data['Prediction1'] = np.nan

    # Create shifted columns for previous day's data
    data['Prev_Morning'] = data['Morning'].shift(1)
    data['Prev_Afternoon'] = data['Afternoon'].shift(1)
    data['Prev_Evening'] = data['Evening'].shift(1)

    # Calculate moving averages excluding current row
    initial_window_size = 3  # Increased by 1 to exclude the current row
    columns_to_average = ['Morning', 'Afternoon', 'Evening', 'Night']
    target_columns = ['Mov_Avg_Mor', 'Mov_Avg_Aft', 'Mov_Avg_Eve', 'Mov_Avg_Nig']

    for col, target_col in zip(columns_to_average, target_columns):
    # Roll over an additional row and then shift to exclude the current row
        data[target_col] = data[col].rolling(window=initial_window_size, min_periods=1).mean().shift(1)
    # Manually set the value for the first row
    unseen_data.at[0, 'Mov_Avg_Mor'] = 6
    unseen_data.at[1, 'Mov_Avg_Mor'] = 22
    unseen_data.at[2, 'Mov_Avg_Mor'] = 17.5
    unseen_data.at[3, 'Mov_Avg_Mor'] = 2
    unseen_data.at[4, 'Mov_Avg_Mor'] = 17

    # Calculate vertical averages excluding current row
    vertical_target_columns = ['Vert_Avg_Mor', 'Vert_Avg_Aft', 'Vert_Avg_Eve', 'Vert_Avg_Nig']
    for col, target_col in zip(columns_to_average, vertical_target_columns):
        data[target_col] = data[col].rolling(window=3, min_periods=1).mean().shift(1)

    # Handle NaN values
    data['Prev_Morning'].fillna(25, inplace=True)
    data['Prev_Afternoon'].fillna(9, inplace=True)
    data['Prev_Evening'].fillna(7, inplace=True)

    # Select relevant columns, including 'Prediction1'
    selected_columns = ['Row Number', 'Data_Type', 'Year', 'Month', 'Day', 'Prev_Week', 'Prev_Entry', 'Mov_Avg_Mor', 'Vert_Avg_Mor', 'Prev_Morning', 'Prev_Afternoon', 'Prev_Evening', 'Prediction1']
    data[selected_columns]

    return data

# Apply preprocessing to the unseen dataset
unseen_data = preprocess_unseen_data(unseen_data)

# Display the preprocessed unseen data
print("First few rows of preprocessed unseen data:")
print(unseen_data.head())

In [None]:
# Cell 1.4: # Save the preprocessed training/testing dataset
preprocessed_train_test_path = os.path.join(drive_dataset_directory, '1_preprocessed_train_test_data.csv')
train_test_data.to_csv(preprocessed_train_test_path, index=False)
print("Preprocessed training/testing data saved to Google Drive.")

# Display the first few rows of the preprocessed training/testing data
print("First few rows of preprocessed training/testing data:")
print(train_test_data.head())

# Check for NaN values in the entire dataset
nan_counts = train_test_data.isnull().sum()
print("Count of NaN values in training/testing data:")
print(nan_counts)

# Save the preprocessed unseen dataset
preprocessed_unseen_path = os.path.join(drive_dataset_directory, '2_preprocessed_unseen_data.csv')
unseen_data.to_csv(preprocessed_unseen_path, index=False)
print("Preprocessed unseen data saved to Google Drive.")

# Display the first few rows of the preprocessed unseen data
print("First few rows of preprocessed unseen data:")
print(unseen_data.head())

# Check for NaN values in the entire dataset
nan_counts = unseen_data.isnull().sum()
print("Count of NaN values in unseen data:")
print(nan_counts)


In [None]:
import pandas as pd
import logging
import os

# Set up logging
logger = logging.getLogger(__name__)

# Define the directory for datasets in Google Drive
drive_dataset_directory = '/content/drive/My Drive/Predictive_Modeling_Four_Draws/Morning_Draw_Model_Docs/'

# Define the path to the preprocessed unseen data
preprocessed_unseen_path = os.path.join(drive_dataset_directory, '2_preprocessed_unseen_data.csv')

# Load the preprocessed unseen data
unseen_data = pd.read_csv(preprocessed_unseen_path)
logger.info("Preprocessed unseen data loaded successfully.")

# Define the provided data for imputation
provided_data = [
    {
        'Row Number': 1410,
        'Morning': 13,
        'Prev_Week': 27,
        '2WeeksM': 25,
        'Prev_Entry': 5,
        'Prev_Entry-2': 7,
        'Mov_Avg_Mor': 6,
        'Vert_Avg_Mor': 26,
        'Afternoon': 20,
        'Prev_Week': 7,
        '2WeeksA': 34,
        'Prev_Entry': 13,
        'Prev_Entry-2': 5,
        'Mov_Avg_Aft': 9,
        'Vert_Avg_Aft': 20.5,
        'Evening': 26,
        'Prev_Week': 26,
        '2WeeksE': 24,
        'Prev_Entry': 20,
        'Prev_Entry-2': 13,
        'Mov_Avg_Eve': 16.5,
        'Vert_Avg_Eve': 25,
        'Night': 18,
        'Prev_Week': 26,
        '2WeeksN': 3,
        'Prev_Entry': 26,
        'Prev_Entry-2': 20,
        'Mov_Avg_Nig': 23,
        'Vert_Avg_Nig': 14.5
    },
    {
        'Row Number': 1411,
        'Morning': 21,
        'Prev_Week': 33,
        '2WeeksM': 12,
        'Prev_Entry': 18,
        'Prev_Entry-2': 26,
        'Mov_Avg_Mor': 22,
        'Vert_Avg_Mor': 22.5,
        'Afternoon': 31,
        'Prev_Week': 18,
        '2WeeksA': 36,
        'Prev_Entry': 21,
        'Prev_Entry-2': 18,
        'Mov_Avg_Aft': 19.5,
        'Vert_Avg_Aft': 27,
        'Evening': 7,
        'Prev_Week': 9,
        '2WeeksE': 3,
        'Prev_Entry': 31,
        'Prev_Entry-2': 21,
        'Mov_Avg_Eve': 26,
        'Vert_Avg_Eve': 6,
        'Night': 28,
        'Prev_Week': 8,
        '2WeeksN': 5,
        'Prev_Entry': 7,
        'Prev_Entry-2': 31,
        'Mov_Avg_Nig': 19,
        'Vert_Avg_Nig': 6.5
    },
    {
        'Row Number': 1412,
        'Morning': 15,
        'Prev_Week': 27,
        '2WeeksM': 3,
        'Prev_Entry': 28,
        'Prev_Entry-2': 7,
        'Mov_Avg_Mor': 17.5,
        'Vert_Avg_Mor': 15,
        'Afternoon': 5,
        'Prev_Week': 22,
        '2WeeksA': 10,
        'Prev_Entry': 15,
        'Prev_Entry-2': 28,
        'Mov_Avg_Aft': 21.5,
        'Vert_Avg_Aft': 16,
        'Evening': 2,
        'Prev_Week': 32,
        '2WeeksE': 4,
        'Prev_Entry': 5,
        'Prev_Entry-2': 15,
        'Mov_Avg_Eve': 10,
        'Vert_Avg_Eve': 18,
        'Night': 2,
        'Prev_Week': 30,
        '2WeeksN': 6,
        'Prev_Entry': 2,
        'Prev_Entry-2': 5,
        'Mov_Avg_Nig': 3.5,
        'Vert_Avg_Nig': 18
    },
    {
        'Row Number': 1413,
        'Morning': 13,
        'Prev_Week': 20,
        '2WeeksM': 11,
        'Prev_Entry': 2,
        'Prev_Entry-2': 2,
        'Mov_Avg_Mor': 2,
        'Vert_Avg_Mor': 15.5,
        'Row Number': 1413,
        'Afternoon': 28,
        'Prev_Week': 29,
        '2WeeksA': 19,
        'Prev_Entry': 13,
        'Prev_Entry-2': 2,
        'Mov_Avg_Aft': 7.5,
        'Vert_Avg_Aft': 24,
        'Evening': 22,
        'Prev_Week': 23,
        '2WeeksE': 29,
        'Prev_Entry': 28,
        'Prev_Entry-2': 13,
        'Mov_Avg_Eve': 20.5,
        'Vert_Avg_Eve': 26,
        'Night': 12,
        'Prev_Week': 2,
        '2WeeksN': 7,
        'Prev_Entry': 22,
        'Prev_Entry-2': 28,
        'Mov_Avg_Nig': 25,
        'Vert_Avg_Nig': 4.5
    },
    {
        'Row Number': 1414,
        'Morning': 12,
        'Prev_Week': 29,
        '2WeeksM': 14,
        'Prev_Entry': 12,
        'Prev_Entry-2': 22,
        'Mov_Avg_Mor': 17,
        'Vert_Avg_Mor': 21.5,
        'Row Number': 1414,
        'Afternoon': 35,
        'Prev_Week': 7,
        '2WeeksA': 31,
        'Prev_Entry': 12,
        'Prev_Entry-2': 12,
        'Mov_Avg_Aft': 12,
        'Vert_Avg_Aft': 19,
        'Evening': 31,
        'Prev_Week': 5,
        '2WeeksE': 32,
        'Prev_Entry': 35,
        'Prev_Entry-2': 12,
        'Mov_Avg_Eve': 23.5,
        'Vert_Avg_Eve': 18.5,
        'Night': 11,
        'Prev_Week': 3,
        '2WeeksN': 18,
        'Prev_Entry': 31,
        'Prev_Entry-2': 35,
        'Mov_Avg_Nig': 33,
        'Vert_Avg_Nig': 10.5
    },
    {
        'Row Number': 1415,
        'Morning': 14,
        'Prev_Week': 25,
        '2WeeksM': 5,
        'Prev_Entry': 11,
        'Prev_Entry-2': 31,
        'Mov_Avg_Mor': 21,
        'Vert_Avg_Mor': 15,
        'Row Number': 1415,
        'Afternoon': 2,
        'Prev_Week': 9,
        '2WeeksA': 14,
        'Prev_Entry': 14,
        'Prev_Entry-2': 11,
        'Mov_Avg_Aft': 12.5,
        'Vert_Avg_Aft': 11.5,
        'Evening': 23,
        'Prev_Week': 7,
        '2WeeksE': 30,
        'Prev_Entry': 2,
        'Prev_Entry-2': 14,
        'Mov_Avg_Eve': 8,
        'Vert_Avg_Eve': 18.5,
        'Row Number': 1415,
        'Night': 25,
        'Prev_Week': 5,
        '2WeeksN': 22,
        'Prev_Entry': 23,
        'Prev_Entry-2': 2,
        'Mov_Avg_Nig': 12.5,
        'Vert_Avg_Nig': 13.5
    }
]

# Iterate through the provided data to update the corresponding columns in the DataFrame
for data in provided_data:
    row_number = data['Row Number']
    for column in data.keys():
        if column != 'Row Number':
            unseen_data.loc[unseen_data['Row Number'] == row_number, column] = data[column]

# Display the dataset after NaN handling
print("First few rows of unseen data after NaN handling:")
print(unseen_data.head())

# Check for NaN values in the entire dataset
nan_counts = unseen_data.isnull().sum()
print("Count of NaN values in unseen data:")
print(nan_counts)

# Save the updated unseen data
updated_unseen_path = os.path.join(drive_dataset_directory, '2_preprocessed_unseen_data.csv')
unseen_data.to_csv(updated_unseen_path, index=False)
logger.info("Updated unseen data saved successfully.")
