In [1]:
# import numpy as np
import pandas as pd

### creating df

In [3]:
data = [['a', 0, 1, 1],
        ['b', 0, 2, float('nan')],
        ['c', 0, 3, 3],
        ['d', 1, 2, float('nan')],
        ['e', 1, 4, 4],
        ['f', 1, 5, 5],
        ['g', 2, 6, 6],
        ['h', 2, 1, 1],
        ['i', 2, 3, 3],
       ]
  
df = pd.DataFrame(data, columns = ['meta', 'line', 'x1', 'x2'])

df

Unnamed: 0,meta,line,x1,x2
0,a,0,1,1.0
1,b,0,2,
2,c,0,3,3.0
3,d,1,2,
4,e,1,4,4.0
5,f,1,5,5.0
6,g,2,6,6.0
7,h,2,1,1.0
8,i,2,3,3.0


### defining list of features, finding duplicates

In [4]:
# list of feature columns
features = ['x1', 'x2']

# find duplicates based on features
dup_df = df[df.duplicated(features)]

dup_df

Unnamed: 0,meta,line,x1,x2
3,d,1,2,
7,h,2,1,1.0
8,i,2,3,3.0


### df without duplicates

In [5]:
drop_dup_df = df.drop_duplicates(subset=features)
drop_dup_df

Unnamed: 0,meta,line,x1,x2
0,a,0,1,1.0
1,b,0,2,
2,c,0,3,3.0
4,e,1,4,4.0
5,f,1,5,5.0
6,g,2,6,6.0


### simulating projection by adding x and y features to filtered df

In [6]:
drop_dup_df.insert(0, "x", [10,11,12,13,14,15], True)
drop_dup_df.insert(1, "y", [20,21,22,23,24,25], True)
drop_dup_df

Unnamed: 0,x,y,meta,line,x1,x2
0,10,20,a,0,1,1.0
1,11,21,b,0,2,
2,12,22,c,0,3,3.0
4,13,23,e,1,4,4.0
5,14,24,f,1,5,5.0
6,15,25,g,2,6,6.0


### build a dictionary of state->coords, iterating through filtered df

In [29]:
state_to_coords = {}

for i, row in drop_dup_df.iterrows():
    values = row[features]
    k = ''.join([str(x) for x in values.tolist()])
    state_to_coords[k] = [row['x'], row['y']]
    
print(state_to_coords)

{'11.0': [10, 20], '2nan': [11, 21], '33.0': [12, 22], '44.0': [13, 23], '55.0': [14, 24], '66.0': [15, 25]}


### add coords to duplicates df using dict

In [30]:
for i, row in dup_df.iterrows():
    values = row[features]
    k = ''.join([str(x) for x in values.tolist()])
    row['x'], row['y'] = state_to_coords[k]
dup_df

Unnamed: 0,meta,line,x1,x2
3,d,1,2,
7,h,2,1,1.0
8,i,2,3,3.0


### concatenating filtered df with df of duplicates after they have been updated by x,y

In [31]:
conc_df = pd.concat([drop_dup_df, dup_df]).sort_index()
conc_df

Unnamed: 0,x,y,meta,line,x1,x2
0,10.0,20.0,a,0,1,1.0
1,11.0,21.0,b,0,2,
2,12.0,22.0,c,0,3,3.0
3,,,d,1,2,
4,13.0,23.0,e,1,4,4.0
5,14.0,24.0,f,1,5,5.0
6,15.0,25.0,g,2,6,6.0
7,,,h,2,1,1.0
8,,,i,2,3,3.0
