In [28]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

def town_encoding(df):
    if "town" in df.columns:
        encoder = OneHotEncoder(sparse_output=False, drop='first')  # Drop first column to avoid redundancy
        town_encoded = encoder.fit_transform(df[['town']])
        town_df = pd.DataFrame(town_encoded, columns=encoder.get_feature_names_out(['town']), index=df.index)
        
        df = df.drop(columns=['town'])  # Drop original column
        df = pd.concat([df, town_df], axis=1)  # Merge encoded columns
    return df

In [29]:
def flat_encoding(df):
    if "flat_type" in df.columns:
        encoder = OneHotEncoder(sparse_output=False, drop='first')  
        flat_encoded = encoder.fit_transform(df[['flat_type']])
        flat_df = pd.DataFrame(flat_encoded, columns=encoder.get_feature_names_out(['flat_type']), index=df.index)
        
        df = df.drop(columns=['flat_type'])  
        df = pd.concat([df, flat_df], axis=1)  
    return df

In [30]:
def load_and_process(file_path, output_path):
    try:
        df = pd.read_csv(file_path)

        # Drop unnecessary columns (e.g., 'date' column if it exists)
        df.drop(columns=['date'], inplace=True, errors='ignore')

        df = town_encoding(df)
        df = flat_encoding(df)

        df.to_csv(output_path, index=False)
        print(f"Processed data saved to: {output_path}")

        return df

    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
        return None

In [31]:
if __name__ == "__main__":
    input_file = "../Dataset/Resaleflatprices.csv"  # Adjust path if needed
    output_file = "../Dataset/Updated_Resaleflatprices.csv"  # Output file

    processed_df = load_and_process(input_file, output_file)

    if processed_df is not None:
        print("Processed Data Sample:")
        print(processed_df.head())

Processed data saved to: ../Dataset/Updated_Resaleflatprices.csv
Processed Data Sample:
     month block        street_name storey_range  floor_area_sqm  \
0  2017-01   406  ANG MO KIO AVE 10     10 TO 12            44.0   
1  2017-01   108   ANG MO KIO AVE 4     01 TO 03            67.0   
2  2017-01   602   ANG MO KIO AVE 5     01 TO 03            67.0   
3  2017-01   465  ANG MO KIO AVE 10     04 TO 06            68.0   
4  2017-01   601   ANG MO KIO AVE 5     01 TO 03            67.0   

       flat_model  lease_commence_date     remaining_lease  resale_price  \
0        Improved                 1979  61 years 04 months      232000.0   
1  New Generation                 1978  60 years 07 months      250000.0   
2  New Generation                 1980  62 years 05 months      262000.0   
3  New Generation                 1980   62 years 01 month      265000.0   
4  New Generation                 1980  62 years 05 months      265000.0   

   town_BEDOK  ...  town_TAMPINES  town_TOA PA