In [118]:
import pandas as pd 
import math 

# Open excel file 
fixes = pd.read_excel('ScriptTest1_Fixes.xlsx')
raw_data = pd.read_csv('test1.csv')
raw_data = raw_data.rename(columns={ raw_data.columns[0]: "id" })

# Splice for useful information
fixes = fixes.loc[:,['Initial ID','Changed ID', 'Time First Seen']].dropna()

# Sort by Time 
fixes = fixes.sort_values(by=['Time First Seen'])

# Define functions 
# Finds and returns a car's initial ID given changed ID 
def FindRootId(id, graph):
    if(id not in graph):
        return np.nan
    current_id = id
    while(len(graph[current_id]) > 0): 
        current_id = graph[current_id][0]
    return current_id

# Is id1 connected to id2? 
def IsConnected(id1, id2, graph): 
    if(id1 not in graph or id2 not in graph): 
        return False 
    
    visited = [id1] 
    queue = [id1]
    while queue:          # Creating loop to visit each node
        m = queue.pop(0) 
        if(m == id2): 
            return True
        for neighbour in graph[m]:
          if(neighbour not in visited):
            visited.append(neighbour)
            queue.append(neighbour)
    return False 

# A fix is not valid if it implies id's were swapped  
def IsValidFix(initial_id, changed_id, graph): 
    if(initial_id == changed_id): 
        return True 
    # Check if the graph forms a cycle afer adding new edge from changed_id to initial_id
    if(changed_id in graph): 
        return not IsConnected(changed_id, initial_id, graph) 
    return True 

# Finds all rows to be changed and replaces id with initial_id 
def FixId(initial_id: int, changed_id: int, time_first_seen_seconds: float):
    margin_of_error = 1/60 
    minimum_time = time_first_seen_seconds - margin_of_error
    print(raw_data.loc[(raw_data['id'] == changed_id) & ((raw_data['time'] >= minimum_time)), 'id'])
    raw_data.loc[(raw_data['id'] == changed_id) & ((raw_data['time'] >= minimum_time)), 'id'] = initial_id
    print(raw_data.loc[(raw_data['id'] == initial_id) & ((raw_data['time'] >= minimum_time)), 'id'])

    
#Construct graph 
graph = {}
for index, row in fixes.iterrows():
    print(row)
    changed_id = int(row['Changed ID'])
    initial_id = int(row['Initial ID'])
    time_first_seen = float(row['Time First Seen'])
    
    if not IsValidFix(initial_id, changed_id, graph): 
        print("Following fix was not valid: ")
        print("Initial ID:" + str(initial_id))
        print("Changed ID:" + str(changed_id))
        print("Time First Seen: "  + str(time_first_seen))
        continue
        
    initial_id_actual = initial_id if initial_id not in graph else FindRootId(initial_id, graph)  
    changed_id_actual = changed_id if changed_id not in graph else FindRootId(changed_id, graph)
    
    
    if(initial_id_actual not in graph): 
        graph[initial_id_actual] = []
    if(changed_id_actual not in graph): 
        graph[changed_id_actual] = []
    FixId(initial_id_actual, changed_id_actual, float(time_first_seen))
    
    graph[changed_id_actual] = [initial_id_actual]

raw_data.to_csv('output_01.csv', index=False)


Initial ID         2.0
Changed ID         1.0
Time First Seen    0.0
Name: 0, dtype: float64
0    1
1    1
2    1
3    1
4    1
5    1
6    1
7    1
8    1
9    1
Name: id, dtype: int64
0     2
1     2
2     2
3     2
4     2
5     2
6     2
7     2
8     2
9     2
10    2
11    2
12    2
13    2
14    2
15    2
16    2
17    2
18    2
19    2
Name: id, dtype: int64
Initial ID         41.0
Changed ID          4.0
Time First Seen     1.0
Name: 1, dtype: float64
30    4
31    4
32    4
33    4
34    4
35    4
36    4
37    4
38    4
39    4
Name: id, dtype: int64
30    41
31    41
32    41
33    41
34    41
35    41
36    41
37    41
38    41
39    41
Name: id, dtype: int64
Initial ID         411.000000
Changed ID          41.000000
Time First Seen      1.033333
Name: 2, dtype: float64
31    41
32    41
33    41
34    41
35    41
36    41
37    41
38    41
39    41
Name: id, dtype: int64
31    411
32    411
33    411
34    411
35    411
36    411
37    411
38    411
39    411
Name: id, d