In [2]:
!pip install pandas_profiling

Collecting pandas_profiling
  Downloading pandas_profiling-3.0.0-py2.py3-none-any.whl (248 kB)
Collecting pydantic>=1.8.1
  Downloading pydantic-1.8.2-cp38-cp38-win_amd64.whl (2.0 MB)
Collecting tangled-up-in-unicode==0.1.0
  Downloading tangled_up_in_unicode-0.1.0-py3-none-any.whl (3.1 MB)
Collecting visions[type_image_path]==0.7.1
  Downloading visions-0.7.1-py3-none-any.whl (102 kB)
Collecting htmlmin>=0.1.12
  Downloading htmlmin-0.1.12.tar.gz (19 kB)
Collecting phik>=0.11.1
  Downloading phik-0.11.2.tar.gz (1.1 MB)
Collecting missingno>=0.4.2
  Downloading missingno-0.4.2-py3-none-any.whl (9.7 kB)
Collecting multimethod==1.4
  Downloading multimethod-1.4-py2.py3-none-any.whl (7.3 kB)
Collecting imagehash; extra == "type_image_path"
  Downloading ImageHash-4.2.0-py2.py3-none-any.whl (295 kB)
Building wheels for collected packages: htmlmin, phik
  Building wheel for htmlmin (setup.py): started
  Building wheel for htmlmin (setup.py): finished with status 'done'
  Created wheel for h

In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import os
from pandas_profiling import ProfileReport

from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split

import pickle

In [3]:
def reduce_range(x):
    # x: int (45, 105)
    # y: int (50, 100)
    # 100/105 = 0.9523
    return int(50 + 0.9523*(x-45))

In [4]:
def get_data(img_path, x=45, y=75):
    img = plt.imread(img_path)
    #img -> 150x150 input image
    output_pixel = img[45][75][0]
    if x==45:
        t = reduce_range(y)
        neighbours = [
            img[50][t-20][0],
            img[50][t-10][0],
            img[50][t][0],
            img[50][t+10][0],
            img[50][t+20][0]
        ]
    elif x==105:
        t = reduce_range(y)
        neighbours = [
            img[100][t-20][0],
            img[100][t-10][0],
            img[100][t][0],
            img[100][t+10][0],
            img[100][t+20][0]
        ]
    elif y==45:
        t = reduce_range(x)
        neighbours = [
            img[t-20][50][0],
            img[t-10][50][0],
            img[t][50][0],
            img[t+10][50][0],
            img[t+20][50][0],
        ]
    else:
        t = reduce_range(x)
        neighbours = [
            img[t-20][100][0],
            img[t-10][100][0],
            img[t][100][0],
            img[t+10][100][0],
            img[t+20][100][0]
        ]
    return neighbours, output_pixel

In [5]:
def get_df(dir, i=45, j=75):
    df = pd.DataFrame(columns=['input_1', 'input_2', 'input_3', 'input_4', 'input_5', 'output'])
    for fl in os.listdir(dir):
        row = get_data(dir+fl, i, j)
        df.loc[len(df.index)] = row[0] +[row[1]]
    return df

In [6]:
df = get_df('./Dataset/seg_test/seg_test/buildings/')
print(df)

    input_1 input_2 input_3 input_4 input_5 output
0       117     129      92     105      97    165
1        90      56      76      73      95    107
2       130     118     199     174     167    201
3       116     122     127     125     124    132
4         5     131     128     235     132     71
..      ...     ...     ...     ...     ...    ...
432     163     171     117     186     130     82
433     104     190     179     190      54     84
434     251     110     111     255     145     73
435     116     120      79     117      97    133
436     205     191     225     125      45    205

[437 rows x 6 columns]


In [7]:
profile = ProfileReport(df, title="Pandas Profiling Report")

In [8]:
profile.to_file("basic_report.html")

HBox(children=(HTML(value='Summarize dataset'), FloatProgress(value=0.0, max=19.0), HTML(value='')))




HBox(children=(HTML(value='Generate report structure'), FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(HTML(value='Render HTML'), FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(HTML(value='Export report to file'), FloatProgress(value=0.0, max=1.0), HTML(value='')))




In [9]:
X = df[df.columns[:5]]
y = df['output']

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [11]:
regr = MLPRegressor(random_state=1, max_iter=500).fit(X_train, y_train)
regr.score(X_test, y_test)



0.3336827084725126

In [12]:
neigh = KNeighborsRegressor(n_neighbors=2)
neigh.fit(X, y)
neigh.score(X_test, y_test)

0.660606269586594

In [13]:
models = []
loc = [
    [45, 45],
    [45, 75],
    [45, 105],
    [75, 45],
    [75, 105],
    [105, 45],
    [105, 75],
    [105, 105]
]

In [14]:
for i,j in loc:
    print(i, j)
    df = get_df('./Dataset/seg_test/seg_test/buildings/', i, j)
    X = df[df.columns[:5]]
    y = df['output']
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
    models.append(KNeighborsRegressor(n_neighbors=2).fit(X, y))
    print(models[-1].score(X_test, y_test))

45 45
0.6298386158494249
45 75
0.660606269586594
45 105
0.6991415676781325
75 45
0.478104299198709
75 105
0.48800789093433916
105 45
0.5519831015781869
105 75
0.47938514515190755
105 105
0.5495442305166169


In [15]:
with open('knn_expansion_models.pkl','wb') as f:
     pickle.dump(models, f)