**IMPORTED LIBRARIES**

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import xgboost as xgb
import lightgbm as lgb

**LOADING DATA FOR USE**

In [None]:
train_file = '/content/drive/MyDrive/YTSP/Train.csv'  # Replace with your actual file path
test_file = '/content/drive/MyDrive/YTSP/Test.csv'
graph_file = '/content/drive/MyDrive/YTSP/Graph.csv'
sample_submission_file = '/content/drive/MyDrive/YTSP/SampleSubmission.csv'

# Load datasets
train_data = pd.read_csv(train_file)
test_data = pd.read_csv(test_file)
graph_data = pd.read_csv(graph_file)
sample_submission = pd.read_csv(sample_submission_file)

# Check the shape and basic info of the train_data after loading
print(f"Shape of train_data: {train_data.shape}")
print(f"Info about train_data:\n{train_data.info()}")

# Check the first few rows to ensure it's not empty
print("First few rows of train_data after loading:")
print(train_data.head())


**EXAMINING AND CLEANING DATA**

In [None]:
# Check for missing values in the training and testing data
print("Missing values in train_data:")
print(train_data.isnull().sum())

# Fill or drop missing values (if necessary)
train_data = train_data.dropna(subset=['target'])  # Drop rows where 'target' is missing

# Ensure columns are clean (remove any extra spaces in column names)
train_data.columns = train_data.columns.str.strip()

# Convert categorical columns ('day' and 'prediction_type') to numeric values using Label Encoding or One-Hot Encoding
# Convert 'day' column to categorical if it represents categories like days of the week
train_data['day'] = pd.to_datetime(train_data['day'], errors='coerce').dt.dayofweek  # Convert to day of the week

# Convert 'prediction_type' column to a numeric label (if it's categorical)
train_data['prediction_type'] = train_data['prediction_type'].astype('category').cat.codes

# Check the types of the columns
print("Data types after conversion:")
print(train_data.dtypes)

# Print columns of train_data
print(f"Columns in train_data after cleaning: {train_data.columns}")


**TRAINING MODEL AND PREDICTION**

In [None]:
# Drop irrelevant columns for model training
columns_to_drop = ['target', 'ID', 'timestamp', '15_min_interval']
X_train = train_data.drop(columns=columns_to_drop, errors='ignore')

# Check the shape of X_train after dropping
print(f"Shape of X_train after dropping columns: {X_train.shape}")
print(f"Columns in X_train after dropping: {X_train.columns}")

# Extract target
y_train = train_data['target'] if 'target' in train_data.columns else None

# Check for missing values in y_train
print(f"Missing values in y_train: {y_train.isnull().sum()}")


# Check if X_train and y_train are still valid
if X_train.empty or y_train is None or y_train.empty:
    print("Error: X_train or y_train is empty. Verify earlier steps.")
else:
    print("Data appears ready for model training.")


# Model Training (Only proceed if data is valid)
if not X_train.empty and y_train is not None and not y_train.empty:
    # Example of training an XGBoost model
    model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, max_depth=6)
    
    # Fit the model to the training data
    model.fit(X_train, y_train)

    # Predict on the test set
    X_test = test_data.drop(columns=['ID', 'timestamp', '15_min_interval'], errors='ignore')
    
    # Perform the same transformation on the test set as on X_train
    X_test['day'] = pd.to_datetime(X_test['day'], errors='coerce').dt.dayofweek
    X_test['prediction_type'] = X_test['prediction_type'].astype('category').cat.codes
    
    predictions = model.predict(X_test)

    # Save predictions to the sample submission file
    sample_submission['target'] = predictions
    sample_submission.to_csv('/content/drive/MyDrive/YTSP/Submissions.csv', index=False)

    print("Model training and submission completed successfully.")
else:
    print("Error: Model training skipped due to empty training data.")