In [1]:
import pandas as pd
import sklearn
import matplotlib as plt
from pycaret.regression import *

In [2]:
df = pd.read_csv('1.complete_df.csv')
df.set_index('Unnamed: 0', inplace = True)
df

Unnamed: 0_level_0,row,column,lat_min,lat_max,lon_min,lon_max,nature,entertainment,transports,art,tourism,security,accessibility,sport
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
R1C1,1.0,1.0,41.133683,41.137514,1.407529,1.400029,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R1C2,1.0,2.0,41.133683,41.137514,1.400029,1.407529,0.0,6.0,22.0,0.0,2.0,0.0,0.0,0.0
R1C3,1.0,3.0,41.133683,41.137514,1.407529,1.415029,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R1C4,1.0,4.0,41.133683,41.137514,1.415029,1.422528,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R1C5,1.0,5.0,41.133683,41.137514,1.422528,1.430028,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
R200C196,200.0,196.0,41.896166,41.899998,2.854991,2.862491,14.0,1.0,14.0,0.0,0.0,0.0,0.0,0.0
R200C197,200.0,197.0,41.896166,41.899998,2.862491,2.869990,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
R200C198,200.0,198.0,41.896166,41.899998,2.869990,2.877490,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0
R200C199,200.0,199.0,41.896166,41.899998,2.877490,2.884990,0.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0


In [3]:
cardinal_directions = ["N", "S", "E", "W"]
diagonal_directions = ["NW", "NE", "SW", "SE"]

In [4]:
categories = ['nature', 'entertainment', 'transports', 'art', 'sport', 'tourism', 'security', 'accessibility']

In [5]:
def add_cardinal_features(source_df, categories_names, direction: str):

    new_categories_names = []
    df = source_df.copy()
    df[['row', 'column']] = df[['row', 'column']].astype(int)

    if direction == 'N':
        for category_name in categories_names:
            new_categories_names.append(category_name + '_north')

        df = df.loc[df.row > 1, ['row', 'column'] + categories_names]
        df['row'] = df['row'] - 1

    if direction == 'S':
        for category_name in categories_names:
            new_categories_names.append(category_name + '_south')

        df = df.loc[df.row < df.row.max(), ['row', 'column'] + categories_names]
        df['row'] = df['row'] + 1

    if direction == 'W':
        for category_name in categories_names:
           new_categories_names.append(category_name + '_west')

        df = df.loc[df.column > 1, ['row', 'column'] + categories_names]
        df['column'] = df['column'] - 1

    if direction == 'E':
        for category_name in categories_names:
            new_categories_names.append(category_name + '_east')

        df = df.loc[df.column < df.column.max(), ['row', 'column'] + categories]
        df['column'] = df['column'] + 1

    df[['row', 'column']] = df[['row', 'column']].astype(str)
    df.columns = ['row', 'column'] + new_categories_names
    df.index = 'R' + df.row + 'C' + df.column

    df = df[new_categories_names]

    return df

In [6]:
def add_diagonal_features(source_df, categories_names, direction: str):

    new_categories_names = []
    df = source_df.copy()
    df[['row', 'column']] = df[['row', 'column']].astype(int)

    if direction == 'NW':
        for category_name in categories_names:
            new_categories_names.append(category_name + '_north_west')

        df = df.loc[(df.row > 1) & (df.column > 1), ['row', 'column'] + categories_names]
        df['row'] = df['row'] - 1
        df['column'] = df['column'] - 1

    if direction == 'SW':
        for category_name in categories_names:
            new_categories_names.append(category_name + '_south_west')

        df = df.loc[(df.row < df.row.max()) & (df.column > 1), ['row', 'column'] + categories_names]
        df['row'] = df['row'] + 1
        df['column'] = df['column'] - 1

    if direction == 'NE':
        for category_name in categories_names:
            new_categories_names.append(category_name + '_north_east')

        df = df.loc[(df.row > 1) & (df.column < df.column.max()), ['row', 'column'] + categories_names]
        df['row'] = df['row'] - 1
        df['column'] = df['column'] + 1

    if direction == 'SE':
        for category_name in categories_names:
            new_categories_names.append(category_name + '_south_east')

        df = df.loc[(df.row < df.row.max()) & (df.column < df.column.max()), ['row', 'column'] + categories]
        df['row'] = df['row'] + 1
        df['column'] = df['column'] + 1

    df[['row', 'column']] = df[['row', 'column']].astype(str)
    df.columns = ['row', 'column'] + new_categories_names
    df.index = 'R' + df.row + 'C' + df.column

    df = df[new_categories_names]

    return df

In [7]:
extended_df = df.copy()

new_df = add_cardinal_features(df, categories, direction = 'W')
extended_df = pd.concat([extended_df, new_df], axis = 1)

new_df = add_cardinal_features(df, categories, direction = 'E')
extended_df = pd.concat([extended_df, new_df], axis = 1)

new_df = add_cardinal_features(df, categories, direction = 'S')
extended_df = pd.concat([extended_df, new_df], axis = 1)

new_df = add_cardinal_features(df, categories, direction = 'W')
extended_df = pd.concat([extended_df, new_df], axis = 1)

extended_df

Unnamed: 0,row,column,lat_min,lat_max,lon_min,lon_max,nature,entertainment,transports,art,...,security_south,accessibility_south,nature_west,entertainment_west,transports_west,art_west,sport_west,tourism_west,security_west,accessibility_west
R1C1,1.0,1.0,41.133683,41.137514,1.407529,1.400029,0.0,0.0,0.0,0.0,...,,,0.0,6.0,22.0,0.0,0.0,2.0,0.0,0.0
R1C2,1.0,2.0,41.133683,41.137514,1.400029,1.407529,0.0,6.0,22.0,0.0,...,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R1C3,1.0,3.0,41.133683,41.137514,1.407529,1.415029,0.0,0.0,0.0,0.0,...,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R1C4,1.0,4.0,41.133683,41.137514,1.415029,1.422528,0.0,0.0,0.0,0.0,...,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R1C5,1.0,5.0,41.133683,41.137514,1.422528,1.430028,0.0,0.0,0.0,0.0,...,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
R200C196,200.0,196.0,41.896166,41.899998,2.854991,2.862491,14.0,1.0,14.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
R200C197,200.0,197.0,41.896166,41.899998,2.862491,2.869990,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0
R200C198,200.0,198.0,41.896166,41.899998,2.869990,2.877490,0.0,0.0,4.0,0.0,...,0.0,0.0,0.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0
R200C199,200.0,199.0,41.896166,41.899998,2.877490,2.884990,0.0,2.0,3.0,0.0,...,0.0,0.0,2.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0


In [8]:
new_df = add_diagonal_features(df, categories, direction = 'NE')
extended_df = pd.concat([extended_df, new_df], axis = 1)

new_df = add_diagonal_features(df, categories, direction = 'SE')
extended_df = pd.concat([extended_df, new_df], axis = 1)

new_df = add_diagonal_features(df, categories, direction = 'SW')
extended_df = pd.concat([extended_df, new_df], axis = 1)

new_df = add_diagonal_features(df, categories, direction = 'NW')
extended_df = pd.concat([extended_df, new_df], axis = 1)

extended_df

Unnamed: 0,row,column,lat_min,lat_max,lon_min,lon_max,nature,entertainment,transports,art,...,security_south_west,accessibility_south_west,nature_north_west,entertainment_north_west,transports_north_west,art_north_west,sport_north_west,tourism_north_west,security_north_west,accessibility_north_west
R1C1,1.0,1.0,41.133683,41.137514,1.407529,1.400029,0.0,0.0,0.0,0.0,...,,,3.0,20.0,39.0,0.0,0.0,2.0,0.0,0.0
R1C2,1.0,2.0,41.133683,41.137514,1.400029,1.407529,0.0,6.0,22.0,0.0,...,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R1C3,1.0,3.0,41.133683,41.137514,1.407529,1.415029,0.0,0.0,0.0,0.0,...,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R1C4,1.0,4.0,41.133683,41.137514,1.415029,1.422528,0.0,0.0,0.0,0.0,...,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R1C5,1.0,5.0,41.133683,41.137514,1.422528,1.430028,0.0,0.0,0.0,0.0,...,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
R200C196,200.0,196.0,41.896166,41.899998,2.854991,2.862491,14.0,1.0,14.0,0.0,...,0.0,0.0,,,,,,,,
R200C197,200.0,197.0,41.896166,41.899998,2.862491,2.869990,1.0,0.0,1.0,0.0,...,0.0,0.0,,,,,,,,
R200C198,200.0,198.0,41.896166,41.899998,2.869990,2.877490,0.0,0.0,4.0,0.0,...,0.0,0.0,,,,,,,,
R200C199,200.0,199.0,41.896166,41.899998,2.877490,2.884990,0.0,2.0,3.0,0.0,...,0.0,0.0,,,,,,,,


In [9]:
extended_df.fillna(value = 0, inplace = True)
extended_df

Unnamed: 0,row,column,lat_min,lat_max,lon_min,lon_max,nature,entertainment,transports,art,...,security_south_west,accessibility_south_west,nature_north_west,entertainment_north_west,transports_north_west,art_north_west,sport_north_west,tourism_north_west,security_north_west,accessibility_north_west
R1C1,1.0,1.0,41.133683,41.137514,1.407529,1.400029,0.0,0.0,0.0,0.0,...,0.0,0.0,3.0,20.0,39.0,0.0,0.0,2.0,0.0,0.0
R1C2,1.0,2.0,41.133683,41.137514,1.400029,1.407529,0.0,6.0,22.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R1C3,1.0,3.0,41.133683,41.137514,1.407529,1.415029,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R1C4,1.0,4.0,41.133683,41.137514,1.415029,1.422528,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R1C5,1.0,5.0,41.133683,41.137514,1.422528,1.430028,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
R200C196,200.0,196.0,41.896166,41.899998,2.854991,2.862491,14.0,1.0,14.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R200C197,200.0,197.0,41.896166,41.899998,2.862491,2.869990,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R200C198,200.0,198.0,41.896166,41.899998,2.869990,2.877490,0.0,0.0,4.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R200C199,200.0,199.0,41.896166,41.899998,2.877490,2.884990,0.0,2.0,3.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
extended_df.to_csv('5.extended_df.csv')

In [11]:
categories

['nature',
 'entertainment',
 'transports',
 'art',
 'sport',
 'tourism',
 'security',
 'accessibility']