In [0]:
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

In [0]:
def area(box):
    return (box[2] - box[0]) * (box[3] - box[1])


def intersection_over_union(boxes):
    assert(len(boxes) == 8)
    boxA = boxes[:4].values
    boxB = boxes[4:].values
    
    boxAArea = area(boxA)
    boxBArea = area(boxB)
    
    if (boxAArea == 0 or boxBArea == 0):
        return 0
        
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    interArea = max(0, xB - xA) * max(0, yB - yA)

    
    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou

In [0]:
from google.colab import drive
drive.mount('drive')

In [0]:
votes = pd.read_csv("drive/My Drive/sna9/train3_data.csv")
data = pd.read_csv("drive/My Drive/sna9/dataiou.csv", index_col=0)
data.head()

Добавлю центры прямоугольников:

In [0]:
votes['Xc'] = votes.Xmin + (votes.Xmax - votes.Xmin) / 2
votes['Yc'] = votes.Ymin + (votes.Ymax - votes.Ymin) / 2
votes['Xc_true'] = votes.Xmin_true + (votes.Xmax_true - votes.Xmin_true) / 2
votes['Yc_true'] = votes.Ymin_true + (votes.Ymax_true - votes.Ymin_true) / 2

## Linear Regression

In [0]:
data['Xc'] = data.Xmin + (data.Xmax - data.Xmin) / 2
data['Yc'] = data.Ymin + (data.Ymax - data.Ymin) / 2
data['Xc_true'] = data.Xmin_true + (data.Xmax_true - data.Xmin_true) / 2
data['Yc_true'] = data.Ymin_true + (data.Ymax_true - data.Ymin_true) / 2

In [0]:
from sklearn.linear_model import LinearRegression, Perceptron, Ridge
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline

In [0]:
X = votes[['Xc', 'Yc']]
y = votes[['Xc_true', 'Yc_true']]
reg = Ridge().fit(X, y)
reg.score(X, y) # 0.9000595931984947

In [0]:
# возьму лучших
data1 = votes[votes.iou > 0.475]
X = data1[['Xc', 'Yc']]
y = data1[['Xc_true', 'Yc_true']]
reg = Ridge().fit(X, y)
reg.score(X, y) # 0.948299220496979


In [0]:
# with split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
reg = Ridge().fit(X_train, y_train)
reg.score(X_test, y_test) # 0.9493693263921095

In [0]:
XX = PolynomialFeatures(3).fit_transform(X_train)
YY = PolynomialFeatures(3).fit_transform(y_train)
reg = LinearRegression().fit(XX, YY)
reg.score(PolynomialFeatures(3).fit_transform(X_test), PolynomialFeatures(3).fit_transform(y_test)) # 0.9446583024460893



0.9446583024460893

## Predict 

In [0]:
X = data1[['Xc', 'Yc']]

In [15]:
pred = reg.predict(X)
pred

array([[173.76550576, 727.54090368],
       [172.26019779, 737.09029373],
       [156.6613013 , 733.02414612],
       ...,
       [212.32558619, 652.72833206],
       [170.13144156, 817.53567818],
       [162.12164016, 812.48509173]])

In [16]:
df_pred = pd.DataFrame({'Xpr': pred[:, 0], 'Ypr': pred[:, 1]}, index=data1.index)
df_pred.head()

Unnamed: 0,Xpr,Ypr
0,173.765506,727.540904
2,172.260198,737.090294
4,156.661301,733.024146
5,150.010894,735.016849
6,89.626493,436.172574


In [0]:
# Восстановление координат 
df_pred['Xmin'] = df_pred.Xpr - (data1.Xmax - data1.Xmin) / 2
df_pred['Ymin'] = df_pred.Ypr - (data1.Ymax - data1.Ymin) / 2
df_pred['Xmax'] = df_pred.Xpr + (data1.Xmax - data1.Xmin) / 2
df_pred['Ymax'] = df_pred.Ypr + (data1.Ymax - data1.Ymin) / 2

In [0]:
data1["iou"] = pd.concat([df_pred[['Xmin','Ymin','Xmax','Ymax']], data1[['Xmin_true','Ymin_true','Xmax_true','Ymax_true']]], axis=1).apply(intersection_over_union, axis=1)
data1["iou"].mean() # 0.5772701104919826

In [0]:
def func(df):
    good1 = df.groupby("itemId")['Xmin', 'Ymin'].min()
    good2 = df.groupby("itemId")['Xmax', 'Ymax', 'Xmin_true',	'Ymin_true',	'Xmax_true',	'Ymax_true'].max()
    good = pd.concat([good1, good2], axis=1)
    good['iou'] = good.apply(intersection_over_union, axis=1)
    print(good['iou'].mean())

In [23]:
func(data1) 0.6156633235642854

0.6156633235642854


In [0]:
# Восстановление координат 3 СДВИГ ЦЕНТРА В СЕРЕДИНУ МЕЖДУ РЕАЛЬНЫМ И ПРЕДСКАЗАННЫМ


# TRY IT...

In [0]:
votest = pd.read_csv("drive/My Drive/sna9/test1_data.csv", index_col=0)

In [53]:
tgoods = votest[votest.userId.isin(data1.userId)]
tgoods.shape # (2880, 12)
len(tgoods.itemId.unique()) # 630!!!


630

In [0]:
tgoods['Xc'] = tgoods.Xmin + (tgoods.Xmax - tgoods.Xmin) / 2
tgoods['Yc'] = tgoods.Ymin + (tgoods.Ymax - tgoods.Ymin) / 2

In [0]:
X = tgoods[['Xc', 'Yc']]
pred = reg.predict(X)
pred

In [56]:
df_pred = pd.DataFrame({'Xpr': pred[:, 0], 'Ypr': pred[:, 1]}, index=tgoods.index)
df_pred.head()

(2880, 2)

In [0]:
tgoods = pd.concat([tgoods, df_pred], axis=1)
tgoods.head()

In [0]:
# Восстановление координат 
tgoods['xmin'] = tgoods.Xpr - (tgoods.Xmax - tgoods.Xmin) / 2
tgoods['ymin'] = tgoods.Ypr - (tgoods.Ymax - tgoods.Ymin) / 2
tgoods['xmax'] = tgoods.Xpr + (tgoods.Xmax - tgoods.Xmin) / 2
tgoods['ymax'] = tgoods.Ypr + (tgoods.Ymax - tgoods.Ymin) / 2

In [60]:
tgoods.head(3)

Unnamed: 0,userId,itemId,Xmin,Ymin,Xmax,Ymax,L,H,P,S,L/H,D,Xc,Yc,Xpr,Ypr,xmin,ymin,xmax,ymax
0,1581,34804,86,640,242,743,156,103,518,16068,1.514563,186.935818,164.0,691.5,169.363891,689.314224,91.363891,637.814224,247.363891,740.814224
1,1351,34804,85,655,273,766,188,111,598,20868,1.693694,218.323155,179.0,710.5,183.408842,708.460654,89.408842,652.960654,277.408842,763.960654
2,161,34804,85,648,268,748,183,100,566,18300,1.83,208.540164,176.5,698.0,181.150554,695.883824,89.650554,645.883824,272.650554,745.883824


In [0]:
tgood1 = tgoods.groupby("itemId")['xmin', 'ymin'].min()
tgood2 = tgoods.groupby("itemId")['xmax', 'ymax'].max()
tgood = pd.concat([tgood1, tgood2], axis=1)
tgood.head()


In [0]:
tgood.to_csv("drive/My Drive/sna9/foosol_e3.csv", header=None)