In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

Load Data

In [None]:
def load_data(file_path, encoding='latin1'):
    return pd.read_csv(file_path, encoding=encoding)

Drop Unnecessary Columns

In [None]:
def drop_columns(df, columns_to_drop):
    df.drop(columns=columns_to_drop, inplace=True)
    return df

Prepare Base Model

In [None]:
def prepare_base_model(df):
    base_model = df[['Ano', 'Mes', 'capital_log', 'duration', 'failure',
                     'socio_unico', 'longitude', 'latitude', 'Poblacion_log',
                     'Sector A Ratio', 'Sector B Ratio', 'Sector C Ratio', 'Sector D Ratio', 'Sector E Ratio',
                     'Sector F Ratio', 'Sector G Ratio', 'Sector H Ratio', 'Sector I Ratio', 'Sector J Ratio',
                     'Sector K Ratio', 'Sector L Ratio', 'Sector M Ratio', 'Sector N Ratio', 'Sector O Ratio',
                     'Sector P Ratio', 'Sector Q Ratio', 'Sector R Ratio', 'Sector S Ratio', 'Sector T Ratio',
                     'sector_A_density', 'sector_B_density', 'sector_C_density', 'sector_D_density', 'sector_E_density', 
                     'sector_F_density', 'sector_G_density', 'sector_H_density', 'sector_I_density', 'sector_J_density',
                     'sector_K_density', 'sector_L_density', 'sector_M_density', 'sector_N_density', 'sector_O_density', 
                     'sector_P_density', 'sector_Q_density', 'sector_R_density', 'sector_S_density', 'sector_T_density', 
                     'urbano', 'competitors_500m']]
    return base_model

Standardize Features

In [None]:
def standardize_features(df, features_to_scale):
    scaler = StandardScaler()
    df[features_to_scale] = scaler.fit_transform(df[features_to_scale])
    return df

Save Data

In [None]:
def save_data(df, file_path, encoding='latin1'):
    df.to_csv(file_path, index=False, encoding=encoding)

Main Function

In [None]:
def process_model_data(input_file_path, output_file_path):
    # Step 1: Load data
    valencia_data = load_data(input_file_path)
    
    # Step 2: Drop unnecessary columns
    valencia_data = drop_columns(valencia_data, ['company_name'])
    
    # Step 3: Prepare base model data
    base_model = prepare_base_model(valencia_data)
    
    # Step 4: Standardize features
    features_to_scale = [col for col in base_model.columns if col not in ['duration', 'failure', 'socio_unico', 
                     'urbano']]
    base_model = standardize_features(base_model, features_to_scale)
    
    # Step 5: Save the processed data
    save_data(base_model, output_file_path)

In [None]:
input_file_path = '/mnt/c/Users/clayt/Data Science/UCM/TFM/Datos/Processed/valencia_data_feature_engineer.csv'
output_file_path = '/mnt/c/Users/clayt/Data Science/UCM/TFM/Datos/Processed/valencia_model_survival.csv'

process_model_data(input_file_path, output_file_path)