In [2]:
import pandas as pd

In [5]:
def transform_csv(input_file, output_file):
    """
    Transform CSV file by renaming, reordering and selecting specific columns.
    
    Parameters:
    input_file (str): Path to input CSV file
    output_file (str): Path to output CSV file
    """
    # Read the CSV file
    df = pd.read_csv(input_file)

    # Create vue_panoramique column based on beach_view and mountain_view
    df['vue_panoramique'] = ((df['beach_view'] == 1) | (df['mountain_view'] == 1)).astype(int)
    
    # Create column mapping dictionary
    column_mapping = {
        'Area': 'surface',
        'room': 'rooms',
        'bathroom': 'bathrooms',
        'garage': 'parking',
        'garden': 'jardin',
        'air_conditioning': 'climatisation',
        'central_heating': 'chauffage_central',
        'elevator': 'ascenseur',
        'price_tnd': 'price'
    }
    
    # Rename the columns
    df = df.rename(columns=column_mapping)
    
    # Select and reorder columns
    selected_columns = [
        'surface',
        'city',
        'rooms',
        'bathrooms',
        'parking',
        'pool',
        'vue_panoramique',
        'jardin',
        'climatisation',
        'chauffage_central',
        'ascenseur',
        'price'
    ]
    
    # Create new dataframe with selected columns
    df_transformed = df[selected_columns]
    
    # Save to new CSV file using semicolon as separator
    df_transformed.to_csv(output_file, sep=',', index=False)
    
    return df_transformed

In [7]:
transform_csv("../data/raw/dataset_clean.csv","../data/raw/dataset_clean.csv")

Unnamed: 0,surface,city,rooms,bathrooms,parking,pool,vue_panoramique,jardin,climatisation,chauffage_central,ascenseur,price
0,550.0,14,5.0,2.0,1.0,0.0,0,0.0,1.0,1.0,0.0,1400000.0
1,446.0,12,4.0,2.0,0.0,0.0,0,0.0,1.0,1.0,0.0,550000.0
2,200.0,25,4.0,2.0,1.0,0.0,0,0.0,1.0,0.0,0.0,330000.0
3,446.0,12,4.0,2.0,1.0,0.0,0,0.0,1.0,1.0,0.0,550000.0
4,206.0,12,5.0,3.0,0.0,0.0,0,0.0,1.0,0.0,0.0,330000.0
...,...,...,...,...,...,...,...,...,...,...,...,...
1343,52.0,12,1.0,1.0,0.0,0.0,0,0.0,1.0,1.0,0.0,270000.0
1344,50.0,33,1.0,1.0,1.0,0.0,0,0.0,0.0,0.0,1.0,60000.0
1345,49.0,41,1.0,1.0,1.0,0.0,0,0.0,1.0,1.0,1.0,150000.0
1346,45.0,59,1.0,1.0,0.0,0.0,0,0.0,0.0,0.0,0.0,50000.0
