In [20]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Step 1: Load the data
# Assuming the CSV file is named 'traffic_data.csv'
data = pd.read_csv('Data.csv', dtype={'Column_Name': str}, low_memory=False)
data = data.dropna(axis=1, how='all')

data['Count Start Date'] = pd.to_datetime(data['Count Start Date'], dayfirst=True, errors='coerce')
# print (data.info())

start_date_filter = '2018-02-09'
filtered_data = data[data['Count Start Date'] >= start_date_filter]

# Display the first few rows of the dataset
# print(filtered_data.head())


# # Step 2: Select and normalize traffic volume features
# features = [
#     '5 Day ADT', '7 Day ADT', 'Saturday Volume', 'Sunday Volume', 
#     'AM Peak Volume', 'Mid Peak Volume', 'PM Peak Volume', 
#     'Car', 'LCV', 'MCV', 'HCV-I', 'HCV-II', 'HCV Total'
# ]

features = [
    '5 Day ADT', '7 Day ADT', 'Saturday Volume', 'Sunday Volume', 
    'AM Peak Volume', 'Mid Peak Volume', 'PM Peak Volume'
]

# for feature in features:
#     # Convert column to string, replace '%' with '', then convert to numeric
#     filtered_data[feature] = pd.to_numeric(filtered_data[feature].astype(str).str.replace('%', ''), errors='coerce') / 100

# # Check if all features are present in the dataset
missing_features = [feature for feature in features if feature not in data.columns]
if missing_features:
    print(f"Missing features: {missing_features}")
else:
    # Extract the features for normalization
    traffic_data = data[features]
    
    # Initialize the StandardScaler
    scaler = StandardScaler()
    
    # Fit and transform the data
    normalized_data = scaler.fit_transform(traffic_data)
    
    # Convert the normalized data back to a DataFrame
    normalized_df = pd.DataFrame(normalized_data, columns=features)
    
    # Display the first few rows of the normalized data
    print(normalized_df.head())


   5 Day ADT  7 Day ADT  Saturday Volume  Sunday Volume  AM Peak Volume  \
0  -0.837671  -0.837809        -0.832228      -0.812276       -0.897382   
1   0.966970   0.876442         0.635303       0.518040        1.321818   
2  -0.423956  -0.395374        -0.242755      -0.379961       -0.478217   
3  -0.330040  -0.323424        -0.247448      -0.364408       -0.213637   
4   0.935627   0.973758         1.321183       0.750128        1.302494   

   Mid Peak Volume  PM Peak Volume  
0        -0.879372       -0.881611  
1         0.652300        1.083606  
2        -0.235736       -0.344498  
3        -0.222074       -0.278758  
4         1.264058        1.104587  
