In [37]:
import pandas as pd
import os

def load(dataset_name='dataset_01'):
    """this function loads a csv in a precise folder and returns a well indexed dataframe

    Args:
        dataset_name (str, optional): _name of the folder in the data/raw _. Defaults to 'dataset_01'.

    Returns:
        _type_: _description_
    """
    script_directory = os.getcwd()
    data_directory = os.path.abspath(os.path.join(script_directory, os.pardir, os.pardir, "data", "raw", dataset_name))
    data_path = os.path.join(data_directory, 'data.csv')
    df=pd.read_csv(data_path)
    

    df['Offer'] = df.apply(lambda row: 'rent' if row['To rent'] else 'sale', axis=1)
    df.drop(['To rent', 'To sell','Surface area of the plot of land'], axis = 1, inplace = True)
    df = df.set_index(['Offer','Id'])
    columns_tuple=[]
    for f in df.columns:
        if f=='Price':
            columns_tuple.append(('Target',f))
        elif f in ['Number of rooms', 'Living Area','State of the building']:
            columns_tuple.append(('Group 1',f))
        elif f == 'zipcode':
            columns_tuple.append(('Spatial',f))
        else:
            columns_tuple.append(('Group 2', f))

    df.columns=pd.MultiIndex.from_tuples(columns_tuple)
    df.reindex(columns = ['Target','Group 1','Group 2','Spatial'], level=0)
    
    return df



In [38]:
df=load()

In [39]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Target,Group 1,Group 1,Group 2,Group 2,Group 2,Group 2,Group 2,Group 2,Group 2,Group 2,Group 2,Group 2,Group 1,Spatial,Group 2
Unnamed: 0_level_1,Unnamed: 1_level_1,Price,Number of rooms,Living Area,Fully equipped kitchen,Furnished,Open fire,Terrace,Area of the terrace,Garden,Area of the garden,Surface of the land,Number of facades,Swimming pool,State of the building,zipcode,type
Offer,Id,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
rent,1610665,485.0,1.0,40.0,True,False,False,False,0.0,False,0.0,,2.0,False,\nExcellentétat\n,4020,Studio
rent,2718948,790.0,2.0,140.0,True,False,False,False,0.0,False,0.0,,2.0,False,\nBon\n,3200,Appartement
rent,3229637,900.0,3.0,103.0,,False,False,False,0.0,True,330.0,400.0,3.0,False,\nBon\n,7020,Maison
rent,3860205,520.0,2.0,50.0,True,False,False,False,0.0,False,0.0,,2.0,False,\nBon\n,6900,Appartement
rent,4141598,760.0,1.0,55.0,True,,False,True,30.0,True,10000.0,,2.0,False,,1470,Appartement


In [40]:
def f0(word):
    state_dic={'\nExcellentétat\n':2,'\nFraîchementrénové\n':1,'\nBon\n':0,
            '\nÀrafraîchir\n':-1,'\nÀrénover\n':-2,'\nÀrestaurer\n':-3}
    if word in state_dic.keys():
        return state_dic[word]
    else:
        return word

In [41]:
df.loc[:,('Group 1','State of the building')]=df['Group 1']['State of the building'].apply(f0)

  df.loc[:,('Group 1','State of the building')]=df['Group 1']['State of the building'].apply(f0)


In [42]:
df['Group 1'].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Number of rooms,Living Area,State of the building
Offer,Id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rent,1610665,1.0,40.0,2.0
rent,2718948,2.0,140.0,0.0
rent,3229637,3.0,103.0,0.0
rent,3860205,2.0,50.0,0.0
rent,4141598,1.0,55.0,


In [43]:
df['Group 2'].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Fully equipped kitchen,Furnished,Open fire,Terrace,Area of the terrace,Garden,Area of the garden,Surface of the land,Number of facades,Swimming pool,type
Offer,Id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
rent,1610665,True,False,False,False,0.0,False,0.0,,2.0,False,Studio
rent,2718948,True,False,False,False,0.0,False,0.0,,2.0,False,Appartement
rent,3229637,,False,False,False,0.0,True,330.0,400.0,3.0,False,Maison
rent,3860205,True,False,False,False,0.0,False,0.0,,2.0,False,Appartement
rent,4141598,True,,False,True,30.0,True,10000.0,,2.0,False,Appartement


In [52]:
df['Group 1'].head()


Unnamed: 0_level_0,Unnamed: 1_level_0,Number of rooms,Living Area,State of the building
Offer,Id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rent,1610665,1.0,40.0,2.0
rent,2718948,2.0,140.0,0.0
rent,3229637,3.0,103.0,0.0
rent,3860205,2.0,50.0,0.0
rent,4141598,1.0,55.0,
