In [53]:
import pandas as pd
import sklearn
import matplotlib as plt
from pycaret.regression import *

In [54]:
df = pd.read_csv('1. complete_df.csv')
df.set_index('Unnamed: 0', inplace = True)
df

Unnamed: 0_level_0,row,column,lat_min,lat_max,lon_min,lon_max,nature,entartainment,transports,art,sport,tourism,security,accessibility
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
R1C1,1.0,1.0,45.000004,45.005004,11.507500,11.500001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R1C2,1.0,2.0,45.000004,45.005004,11.500001,11.507500,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
R1C3,1.0,3.0,45.000004,45.005004,11.507500,11.515000,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0
R1C4,1.0,4.0,45.000004,45.005004,11.515000,11.522500,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R1C5,1.0,5.0,45.000004,45.005004,11.522500,11.530000,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
R200C196,200.0,196.0,45.994998,45.999998,12.955000,12.962500,3.0,1.0,5.0,0.0,0.0,0.0,0.0,1.0
R200C197,200.0,197.0,45.994998,45.999998,12.962500,12.970000,4.0,0.0,11.0,1.0,3.0,0.0,0.0,0.0
R200C198,200.0,198.0,45.994998,45.999998,12.970000,12.977500,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
R200C199,200.0,199.0,45.994998,45.999998,12.977500,12.985000,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [55]:
cardinal_directions = ["N", "S", "E", "W"]
diagonal_directions = ["NW", "NE", "SW", "SE"]

In [56]:
categories = ['nature', 'entartainment', 'transports', 'art', 'sport', 'tourism', 'security', 'accessibility']

In [57]:
def add_cardinal_features(source_df, categories_names, direction: str):

    new_categories_names = []
    df = source_df.copy()
    df[['row', 'column']] = df[['row', 'column']].astype(int)

    if direction == 'N':
        for category_name in categories_names:
            new_categories_names.append(category_name + '_north')

        df = df.loc[df.row > 1, ['row', 'column'] + categories_names]
        df['row'] = df['row'] - 1

    if direction == 'S':
        for category_name in categories_names:
            new_categories_names.append(category_name + '_south')

        df = df.loc[df.row < df.row.max(), ['row', 'column'] + categories_names]
        df['row'] = df['row'] + 1

    if direction == 'W':
        for category_name in categories_names:
           new_categories_names.append(category_name + '_west')

        df = df.loc[df.column > 1, ['row', 'column'] + categories_names]
        df['column'] = df['column'] - 1

    if direction == 'E':
        for category_name in categories_names:
            new_categories_names.append(category_name + '_east')

        df = df.loc[df.column < df.column.max(), ['row', 'column'] + categories]
        df['column'] = df['column'] + 1

    df[['row', 'column']] = df[['row', 'column']].astype(str)
    df.columns = ['row', 'column'] + new_categories_names
    df.index = 'R' + df.row + 'C' + df.column

    df = df[new_categories_names]

    return df

In [58]:
def add_diagonal_features(source_df, categories_names, direction: str):

    new_categories_names = []
    df = source_df.copy()
    df[['row', 'column']] = df[['row', 'column']].astype(int)

    if direction == 'NW':
        for category_name in categories_names:
            new_categories_names.append(category_name + '_north_west')

        df = df.loc[(df.row > 1) & (df.column > 1), ['row', 'column'] + categories_names]
        df['row'] = df['row'] - 1
        df['column'] = df['column'] - 1

    if direction == 'SW':
        for category_name in categories_names:
            new_categories_names.append(category_name + '_south_west')

        df = df.loc[(df.row < df.row.max()) & (df.column > 1), ['row', 'column'] + categories_names]
        df['row'] = df['row'] + 1
        df['column'] = df['column'] - 1

    if direction == 'NE':
        for category_name in categories_names:
            new_categories_names.append(category_name + '_north_east')

        df = df.loc[(df.row > 1) & (df.column < df.column.max()), ['row', 'column'] + categories_names]
        df['row'] = df['row'] - 1
        df['column'] = df['column'] + 1

    if direction == 'SE':
        for category_name in categories_names:
            new_categories_names.append(category_name + '_south_east')

        df = df.loc[(df.row < df.row.max()) & (df.column < df.column.max()), ['row', 'column'] + categories]
        df['row'] = df['row'] + 1
        df['column'] = df['column'] + 1

    df[['row', 'column']] = df[['row', 'column']].astype(str)
    df.columns = ['row', 'column'] + new_categories_names
    df.index = 'R' + df.row + 'C' + df.column

    df = df[new_categories_names]

    return df

In [59]:
extended_df = df.copy()

new_df = add_cardinal_features(df, categories, direction = 'W')
extended_df = pd.concat([extended_df, new_df], axis = 1)

new_df = add_cardinal_features(df, categories, direction = 'E')
extended_df = pd.concat([extended_df, new_df], axis = 1)

new_df = add_cardinal_features(df, categories, direction = 'S')
extended_df = pd.concat([extended_df, new_df], axis = 1)

new_df = add_cardinal_features(df, categories, direction = 'W')
extended_df = pd.concat([extended_df, new_df], axis = 1)

extended_df

Unnamed: 0,row,column,lat_min,lat_max,lon_min,lon_max,nature,entartainment,transports,art,...,security_south,accessibility_south,nature_west,entartainment_west,transports_west,art_west,sport_west,tourism_west,security_west,accessibility_west
R1C1,1.0,1.0,45.000004,45.005004,11.507500,11.500001,0.0,0.0,0.0,0.0,...,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
R1C2,1.0,2.0,45.000004,45.005004,11.500001,11.507500,0.0,0.0,1.0,0.0,...,,,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0
R1C3,1.0,3.0,45.000004,45.005004,11.507500,11.515000,0.0,0.0,2.0,0.0,...,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R1C4,1.0,4.0,45.000004,45.005004,11.515000,11.522500,0.0,0.0,0.0,0.0,...,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
R1C5,1.0,5.0,45.000004,45.005004,11.522500,11.530000,0.0,0.0,1.0,0.0,...,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
R200C196,200.0,196.0,45.994998,45.999998,12.955000,12.962500,3.0,1.0,5.0,0.0,...,0.0,0.0,4.0,0.0,11.0,1.0,3.0,0.0,0.0,0.0
R200C197,200.0,197.0,45.994998,45.999998,12.962500,12.970000,4.0,0.0,11.0,1.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
R200C198,200.0,198.0,45.994998,45.999998,12.970000,12.977500,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
R200C199,200.0,199.0,45.994998,45.999998,12.977500,12.985000,0.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [61]:
new_df = add_diagonal_features(df, categories, direction = 'NE')
extended_df = pd.concat([extended_df, new_df], axis = 1)

new_df = add_diagonal_features(df, categories, direction = 'SE')
extended_df = pd.concat([extended_df, new_df], axis = 1)

new_df = add_diagonal_features(df, categories, direction = 'SW')
extended_df = pd.concat([extended_df, new_df], axis = 1)

new_df = add_diagonal_features(df, categories, direction = 'NW')
extended_df = pd.concat([extended_df, new_df], axis = 1)

extended_df

Unnamed: 0,row,column,lat_min,lat_max,lon_min,lon_max,nature,entartainment,transports,art,...,security_south_west,accessibility_south_west,nature_north_west,entartainment_north_west,transports_north_west,art_north_west,sport_north_west,tourism_north_west,security_north_west,accessibility_north_west
R1C1,1.0,1.0,45.000004,45.005004,11.507500,11.500001,0.0,0.0,0.0,0.0,...,,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R1C2,1.0,2.0,45.000004,45.005004,11.500001,11.507500,0.0,0.0,1.0,0.0,...,,,0.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0
R1C3,1.0,3.0,45.000004,45.005004,11.507500,11.515000,0.0,0.0,2.0,0.0,...,,,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0
R1C4,1.0,4.0,45.000004,45.005004,11.515000,11.522500,0.0,0.0,0.0,0.0,...,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R1C5,1.0,5.0,45.000004,45.005004,11.522500,11.530000,0.0,0.0,1.0,0.0,...,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
R200C196,200.0,196.0,45.994998,45.999998,12.955000,12.962500,3.0,1.0,5.0,0.0,...,0.0,0.0,,,,,,,,
R200C197,200.0,197.0,45.994998,45.999998,12.962500,12.970000,4.0,0.0,11.0,1.0,...,0.0,0.0,,,,,,,,
R200C198,200.0,198.0,45.994998,45.999998,12.970000,12.977500,0.0,0.0,1.0,0.0,...,0.0,0.0,,,,,,,,
R200C199,200.0,199.0,45.994998,45.999998,12.977500,12.985000,0.0,0.0,1.0,0.0,...,0.0,0.0,,,,,,,,


In [62]:
extended_df.fillna(value = 0, inplace = True)
extended_df

Unnamed: 0,row,column,lat_min,lat_max,lon_min,lon_max,nature,entartainment,transports,art,...,security_south_west,accessibility_south_west,nature_north_west,entartainment_north_west,transports_north_west,art_north_west,sport_north_west,tourism_north_west,security_north_west,accessibility_north_west
R1C1,1.0,1.0,45.000004,45.005004,11.507500,11.500001,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R1C2,1.0,2.0,45.000004,45.005004,11.500001,11.507500,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0
R1C3,1.0,3.0,45.000004,45.005004,11.507500,11.515000,0.0,0.0,2.0,0.0,...,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0
R1C4,1.0,4.0,45.000004,45.005004,11.515000,11.522500,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R1C5,1.0,5.0,45.000004,45.005004,11.522500,11.530000,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
R200C196,200.0,196.0,45.994998,45.999998,12.955000,12.962500,3.0,1.0,5.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R200C197,200.0,197.0,45.994998,45.999998,12.962500,12.970000,4.0,0.0,11.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R200C198,200.0,198.0,45.994998,45.999998,12.970000,12.977500,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R200C199,200.0,199.0,45.994998,45.999998,12.977500,12.985000,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [63]:
extended_df.to_csv('5.extended_df.csv')

In [64]:
categories

['nature',
 'entartainment',
 'transports',
 'art',
 'sport',
 'tourism',
 'security',
 'accessibility']