In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split

try:
    dataset = pd.read_csv('USA_Housing.csv')
except FileNotFoundError:
    print("Error: 'USA_Housing.csv' not found. Please upload the dataset to your Colab session.")
    exit()

print("Dataset successfully loaded. Here are the first 5 rows:")
print(dataset.head())
print("\n" + "="*50 + "\n")

X = dataset.drop(['Price', 'Address'], axis=1)
y = dataset['Price']

print(f"Features (X) shape: {X.shape}")
print(f"Target (y) shape: {y.shape}\n")

print("--- 1. Performing a 2-Way Split (80% Train, 20% Test) ---")

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")
print("\n" + "="*50 + "\n")

print("--- 2. Performing a 3-Way Split (70% Train, 15% Validation, 15% Test) ---")

X_train_val, X_test_3way, y_train_val, y_test_3way = train_test_split(
    X, y, test_size=0.15, random_state=42
)

val_size_relative = 0.15 / 0.85
X_train_3way, X_val, y_train_3way, y_val = train_test_split(
    X_train_val, y_train_val, test_size=val_size_relative, random_state=42
)

print(f"Total original samples: {len(X)}")
print(f"Training set size: {len(X_train_3way)} ({len(X_train_3way)/len(X):.0%})")
print(f"Validation set size: {len(X_val)} ({len(X_val)/len(X):.0%})")
print(f"Test set size: {len(X_test_3way)} ({len(X_test_3way)/len(X):.0%})")
print("\nFinal Shapes:")
print(f"X_train shape: {X_train_3way.shape}")
print(f"X_val shape:   {X_val.shape}")
print(f"X_test shape:  {X_test_3way.shape}")

Dataset successfully loaded. Here are the first 5 rows:
   Avg. Area Income  Avg. Area House Age  Avg. Area Number of Rooms  \
0      79545.458574             5.682861                   7.009188   
1      79248.642455             6.002900                   6.730821   
2      61287.067179             5.865890                   8.512727   
3      63345.240046             7.188236                   5.586729   
4      59982.197226             5.040555                   7.839388   

   Avg. Area Number of Bedrooms  Area Population         Price  \
0                          4.09     23086.800503  1.059034e+06   
1                          3.09     40173.072174  1.505891e+06   
2                          5.13     36882.159400  1.058988e+06   
3                          3.26     34310.242831  1.260617e+06   
4                          4.23     26354.109472  6.309435e+05   

                                             Address  
0  208 Michael Ferry Apt. 674\nLaurabury, NE 3701...  
1  188 Joh