In [1]:
import json
import math
import pandas as pd
import numpy as np
from nltk.corpus import wordnet
from IPython.display import display

In [2]:
#preloading all schemas and categories
all_schemas={}
all_categories={}
with open("final_schema.txt") as ip_file:
    for line in ip_file.readlines():
        json_obj=json.loads(line)
        all_schemas[json_obj["filename"]]=json_obj["schema"]
        all_categories[json_obj["filename"]]=json_obj["categories"]
        
#preloading the candidate keys
with open("Candidate_key_dict.txt",'r') as ip_file:
    cand_key=json.load(ip_file)

#preloading column and category similarity values of tables
with open("cos_similarity.txt",'r') as ip_file:
    json_object=json.load(ip_file)
col_sim = json_object["column_similarity"]
cat_sim = json_object["category_similarity"]

In [3]:
# all functions needed to generate ontologies
def get_synonyms(word):
    meanings=set()
    for synset in wordnet.synsets(word,pos=wordnet.NOUN):
        for lemma in synset.lemmas():
            meanings.add(lemma.name())
    for synset in wordnet.synsets(word,pos=wordnet.NOUN):
        for hypernym in synset.hypernyms():
            meanings.add(hypernym.lemma_names()[0])
    meanings.add(word)
    return list(meanings)

def generate_list_ontology(list1):
    ontology={}
    for word in list1:
        ontology[word]=get_synonyms(word)
    return ontology

def generate_schema_ontology(input_schema):
    ontology={}
    for col in input_schema:
        ontology[col]=get_synonyms(col)
    return ontology

In [4]:
transform_funct_list=['average','sum','maximum','minimum','range','median','variance','standard deviation','mode','frequency']
tf_onto=generate_list_ontology(transform_funct_list)
tf_onto["average"].append("avg")

In [5]:
# to generate cos similarity between two lists
def cos_sim(list1, list2):
    terms = set(list1).union(list2)
    intersect = set(list1) & set(list2)
    others = (set(list1)-intersect).union(set(list2)-intersect)
    product=0
    for word in terms:
        if word in intersect:
            product+=1
    l1mag = math.sqrt(len(list1))
    l2mag = math.sqrt(len(list2))
    if len(list1)==0 or len(list2)==0:
        return 0.0
    else:
        return product / (l1mag * l2mag)

In [6]:
# if input has only schema(columns and their dataypes)
def col_only_list(input_schema,input_sch_onto):
    possible_tables={}
    for file in all_schemas:
        schema=all_schemas[file]
        for col,d_type in schema.items():
            if (col in input_schema) and (input_schema[col]==d_type):
                if file in possible_tables:
                    possible_tables[file].append(col)
                else:
                    possible_tables[file]=[]
                    possible_tables[file].append(col)
            else:
                for a in input_sch_onto:
                    if (col in input_sch_onto[a]) and (input_schema[a]==d_type):
                        if file in possible_tables:
                            possible_tables[file].append(a)
                        else:
                            possible_tables[file]=[]
                            possible_tables[file].append(a)
    return possible_tables

In [7]:
# if input has categories as well as schema(columns and their dataypes)
# we consider it a match under the assumption that at least 75% category match exists
def cat_and_col_list(input_categories,input_cat_onto,input_schema,input_sch_onto):
    possible_tables={}
    for file in all_categories:
        cat_list=[]
        category=all_categories[file]
        for cat in category:
            if cat in input_categories:
                cat_list.append(cat)
            else:
                for cat1 in input_cat_onto:
                    if cat in input_cat_onto[cat1]:
                        cat_list.append(cat1)
        cos_val=cos_sim(cat_list,input_categories)
        if cos_val > 0.75 :
            schema=all_schemas[file]
            for col,d_type in schema.items():
                if (col in input_schema) and (input_schema[col]==d_type):
                    if file in possible_tables:
                        possible_tables[file].append(col)
                    else:
                        possible_tables[file]=[]
                        possible_tables[file].append(col)
                else:
                    for a in input_sch_onto:
                        if (col in input_sch_onto[a]) and (input_schema[a]==d_type):
                            if file in possible_tables:
                                possible_tables[file].append(a)
                            else:
                                possible_tables[file]=[]
                                possible_tables[file].append(a)
    return possible_tables

In [8]:
def merge(fname1,fname2):
    matching_columns = {}  #resultant list containing the matching columns.
    #getting schema for tables.
    f1 = all_schemas[fname1]
    f2 = all_schemas[fname2]
    #Retrieving the column names and generating the ontologies for one of the tables columns
    f1_cols = list(f1)
    f1_onto = generate_list_ontology(f1_cols)
    #finding columns that match.     
    for col , d_type in f2.items():
        if (col in f1_cols) and (d_type==f1[col]):
            matching_columns[col] = col
        else:
            for col_t1 in f1_onto:
                if (col in f1_onto[col_t1]) and (d_type==f1[col_t1]):
                    matching_columns[col_t1] = col
                    break
    #Now the varaible matching_columns contains a list of names of columns that match between the two tables.
    t1 = cand_key[fname1]
    t2 = cand_key[fname2]
    mat_cols={}
    for key , value in matching_columns.items():
        if(key in t1 or value in t2):
            mat_cols[key]=value
    return merge_tables(fname1,fname2,mat_cols)

def merge_tables(fname1,fname2,cols):
    t1 = pd.read_csv(fname1)
    t2 = pd.read_csv(fname2)
    display(t1)
    display(t2)
    print(fname1+' and '+fname2+' gives : ')
    l=len(cols)
    if l!=0:
        for name1,name2 in cols.items():
            t2.rename(columns = {name2:name1},inplace=True)
    t3=t1.merge(t2,how='outer')
    display(t3)
    return t3

In [65]:
def check_possible_matches_transform():
    with open("input_tranformations.txt",'r') as ip_file:
        ip_schema=json.load(ip_file)
        ip_schema=ip_schema["schema"]
    cols={}
    transform={}
    for i,d_type in ip_schema.items():
        x=i.split()
        if len(x)==1:
            cols[i]=d_type
        else:
            for a in x:
                a1=a.lower()
                if a1 in transform_funct_list:
                    x.remove(a)
                    str1=x[0]
                    for w in x[1:]:
                        str1+=" "+w
                    cols[str1]=d_type
                    transform[str]=a1
                else:
                    for c in tf_onto:
                        if a1 in tf_onto[c]:
                            x.remove(a)
                            str1=x[0]
                            for w in x[1:]:
                                str1+=" "+w
                            cols[str1]=d_type
                            transform[str1]=a1
    input_schema=cols
    input_sch_onto=generate_schema_ontology(input_schema)
    if "categories" in json_object:
        print('category and schema')
        input_categories=json_object["categories"]
        input_cat_onto=generate_list_ontology(input_categories)
        possible_tables=cat_and_col_list(input_categories,input_cat_onto,input_schema,input_sch_onto)
    else:
        print('only schema')
        possible_tables=col_only_list(input_schema,input_sch_onto)
    matching_tables={}
    for i in possible_tables:
        cos_val=cos_sim(possible_tables[i],list(input_schema))
        if cos_val>0.75:
            matching_tables[i]=possible_tables[i]
    return (matching_tables,transform)

In [81]:
def get_transform_matches():
    (matching_tables_dict,transform_cols)=check_possible_matches_transform()
    matching_tables=list(matching_tables_dict)
    with open("output_folder_transformation/results.txt",'a') as f:
        print('-----------------------',file=f)
        print("Transformation Match",file=f)
        if len(transform_cols)==0:
            print("No Transformations In Input Schema",file=f)
            return
        else:
            print('Transformations detected are : ',transform_cols,file=f)
    op_str1='output_folder_transformation/'
    op_str2='t.csv'
    x=len(matching_tables)
    print(matching_tables)
    if x==0:
        with open("output_folder_transformation/results.txt",'a') as f:
            print("NO MATCHES FOUND",file=f)
    elif x==1:
        with open("output_folder_transformation/results.txt",'a') as f:
            print(matching_tables[0],file=f)
    else:
        ctr=0
        for i in range(x-1):
            for j in range(i+1,x):
                a=matching_tables[i]+' : '+matching_tables[j]
                b=matching_tables[j]+' : '+matching_tables[i]
                flag=0
                if (a in cat_sim):
                    if cat_sim[a]>=.50 and col_sim[a]>=0.50:
                        res=merge(matching_tables[i],matching_tables[j])
                        ctr+=1
                        op_string=op_str1+str(ctr)+op_str2
                        res.to_csv(op_string,sep=',', index=False)
                        flag=1
                elif (b in cat_sim):
                    if cat_sim[b]>=.50 and col_sim[b]>=0.50:
                        res=merge(matching_tables[i],matching_tables[j])
                        ctr+=1
                        op_string=op_str1+str(ctr)+op_str2
                        res.to_csv(op_string,sep=',', index=False)
                        flag=1
                else:
                    pass
                if flag==1:
                    l={}
                    cols=res.columns
                    for col,tran in transform_cols.items():
                        if col in cols:
                            l[col]=tran
                    with open("output_folder_transformation/results.txt",'a') as f:
                        print(str(ctr)+op_str2+' : '+matching_tables[i]+' and '+matching_tables[j],file=f)
                        print("possible transformations are : ",l,file=f)
                        print("Columns that match with input_schema:\n "+matching_tables[i]+' : ',matching_tables_dict[matching_tables[i]],'\t '+matching_tables[j]+' : ',matching_tables_dict[matching_tables[i]],file=f)
        with open("output_folder_transformation/results.txt",'a') as f:
            for i in matching_tables:
                print(i,file=f)
                print("Columns that match with input_schema:\n "+i+' : ',matching_tables_dict[i],file=f)

In [82]:
get_transform_matches()

only schema
['202_117.csv', '203_148.csv', '203_268.csv', '204_100.csv']


Unnamed: 0,Date,Ship,Nationality,Tonnage,Fate
0,19 August 1941,SS Aguila,United Kingdom,3255,Sunk
1,27 November 1941,HMAS Parramatta,Royal Australian Navy,1060,Sunk
2,23 December 1941,SS Shuntien,United Kingdom,3059,Sunk
3,26 December 1941,SS Warszawa,Poland,2487,Sunk
4,10 June 1942,MV Athene,Norway,4681,Sunk
5,10 June 1942,SS Brambleleaf,United Kingdom,5917,Damaged


Unnamed: 0,Date,Name,Nationality,Tonnage (GRT),Fate
0,18 January 1940,Flandria,Sweden,1179,Sunk
1,19 January 1940,Patria,Sweden,1188,Sunk
2,11 February 1940,Linda,Estonia,1213,Sunk
3,4 May 1940,San Tiburcio,United Kingdom,5995,Sunk (mine)
4,9 May 1940,Doris,French Navy,552,Sunk
5,11 May 1940,Tringa,United Kingdom,1930,Sunk
6,11 May 1940,Viiu,Estonia,1908,Sunk
7,23 May 1940,Sigurd Faulbaum,Belgium,3256,Sunk
8,11 May 1944,Shtorm,Soviet Union,412,Damaged


202_117.csv and 203_148.csv gives : 


Unnamed: 0,Date,Ship,Nationality,Tonnage,Fate,Name,Tonnage (GRT)
0,19 August 1941,SS Aguila,United Kingdom,3255.0,Sunk,,
1,27 November 1941,HMAS Parramatta,Royal Australian Navy,1060.0,Sunk,,
2,23 December 1941,SS Shuntien,United Kingdom,3059.0,Sunk,,
3,26 December 1941,SS Warszawa,Poland,2487.0,Sunk,,
4,10 June 1942,MV Athene,Norway,4681.0,Sunk,,
5,10 June 1942,SS Brambleleaf,United Kingdom,5917.0,Damaged,,
6,18 January 1940,,Sweden,,Sunk,Flandria,1179.0
7,19 January 1940,,Sweden,,Sunk,Patria,1188.0
8,11 February 1940,,Estonia,,Sunk,Linda,1213.0
9,4 May 1940,,United Kingdom,,Sunk (mine),San Tiburcio,5995.0


Unnamed: 0,Date,Ship,Nationality,Tonnage,Fate
0,19 August 1941,SS Aguila,United Kingdom,3255,Sunk
1,27 November 1941,HMAS Parramatta,Royal Australian Navy,1060,Sunk
2,23 December 1941,SS Shuntien,United Kingdom,3059,Sunk
3,26 December 1941,SS Warszawa,Poland,2487,Sunk
4,10 June 1942,MV Athene,Norway,4681,Sunk
5,10 June 1942,SS Brambleleaf,United Kingdom,5917,Damaged


Unnamed: 0,Date,Name,Nationality,Tonnage (GRT),Fate
0,7 October 1941,Svend Foyn,United Kingdom,14795,Damaged
1,16 February 1942,Monagas,Venezuela,2650,Sunk
2,16 February 1942,San Nicholas,United Kingdom,2391,Sunk
3,16 February 1942,Tia Juana,United Kingdom,2395,Sunk
4,22 February 1942,J.N.Pew,United States,9033,Sunk
5,23 February 1942,Sun,United States,9002,Damaged
6,23 February 1942,Thalia,Panama,8329,Sunk
7,11 May 1942,Cape of Good Hope,United Kingdom,4963,Sunk
8,24 May 1942,Gonçalves Dias,Brazil,4996,Sunk
9,28 May 1942,Alcoa Pilgrim,United States,6759,Sunk


202_117.csv and 203_268.csv gives : 


Unnamed: 0,Date,Ship,Nationality,Tonnage,Fate,Name,Tonnage (GRT)
0,19 August 1941,SS Aguila,United Kingdom,3255.0,Sunk,,
1,27 November 1941,HMAS Parramatta,Royal Australian Navy,1060.0,Sunk,,
2,23 December 1941,SS Shuntien,United Kingdom,3059.0,Sunk,,
3,26 December 1941,SS Warszawa,Poland,2487.0,Sunk,,
4,10 June 1942,MV Athene,Norway,4681.0,Sunk,,
5,10 June 1942,SS Brambleleaf,United Kingdom,5917.0,Damaged,,
6,7 October 1941,,United Kingdom,,Damaged,Svend Foyn,14795.0
7,16 February 1942,,Venezuela,,Sunk,Monagas,2650.0
8,16 February 1942,,United Kingdom,,Sunk,San Nicholas,2391.0
9,16 February 1942,,United Kingdom,,Sunk,Tia Juana,2395.0


Unnamed: 0,Date,Name,Nationality,Tonnage (GRT),Fate
0,18 January 1940,Flandria,Sweden,1179,Sunk
1,19 January 1940,Patria,Sweden,1188,Sunk
2,11 February 1940,Linda,Estonia,1213,Sunk
3,4 May 1940,San Tiburcio,United Kingdom,5995,Sunk (mine)
4,9 May 1940,Doris,French Navy,552,Sunk
5,11 May 1940,Tringa,United Kingdom,1930,Sunk
6,11 May 1940,Viiu,Estonia,1908,Sunk
7,23 May 1940,Sigurd Faulbaum,Belgium,3256,Sunk
8,11 May 1944,Shtorm,Soviet Union,412,Damaged


Unnamed: 0,Date,Name,Nationality,Tonnage (GRT),Fate
0,7 October 1941,Svend Foyn,United Kingdom,14795,Damaged
1,16 February 1942,Monagas,Venezuela,2650,Sunk
2,16 February 1942,San Nicholas,United Kingdom,2391,Sunk
3,16 February 1942,Tia Juana,United Kingdom,2395,Sunk
4,22 February 1942,J.N.Pew,United States,9033,Sunk
5,23 February 1942,Sun,United States,9002,Damaged
6,23 February 1942,Thalia,Panama,8329,Sunk
7,11 May 1942,Cape of Good Hope,United Kingdom,4963,Sunk
8,24 May 1942,Gonçalves Dias,Brazil,4996,Sunk
9,28 May 1942,Alcoa Pilgrim,United States,6759,Sunk


203_148.csv and 203_268.csv gives : 


Unnamed: 0,Date,Name,Nationality,Tonnage (GRT),Fate
0,18 January 1940,Flandria,Sweden,1179,Sunk
1,19 January 1940,Patria,Sweden,1188,Sunk
2,11 February 1940,Linda,Estonia,1213,Sunk
3,4 May 1940,San Tiburcio,United Kingdom,5995,Sunk (mine)
4,9 May 1940,Doris,French Navy,552,Sunk
5,11 May 1940,Tringa,United Kingdom,1930,Sunk
6,11 May 1940,Viiu,Estonia,1908,Sunk
7,23 May 1940,Sigurd Faulbaum,Belgium,3256,Sunk
8,11 May 1944,Shtorm,Soviet Union,412,Damaged
9,7 October 1941,Svend Foyn,United Kingdom,14795,Damaged


In [58]:
def check_possible_matches():
    with open("input_tranformations.txt",'r') as ip_file:
        json_object=json.load(ip_file)
    input_schema=json_object["schema"]
    input_sch_onto=generate_schema_ontology(input_schema)
    if "categories" in json_object:
        print('category and schema')
        input_categories=json_object["categories"]
        input_cat_onto=generate_list_ontology(input_categories)
        possible_tables=cat_and_col_list(input_categories,input_cat_onto,input_schema,input_sch_onto)
    else:
        print('only schema')
        possible_tables=col_only_list(input_schema,input_sch_onto)
    matching_tables={}
    for i in possible_tables:
        cos_val=cos_sim(possible_tables[i],list(input_schema))
        if cos_val>0.75:
            matching_tables[i]=possible_tables[i]
    return matching_tables

In [83]:
def get_matches():
    with open("output_folder_transformation/results.txt",'a') as f:
        print('-------------',file=f)
        print("Direct Match",file=f)
    matching_tables_dict=check_possible_matches()
    matching_tables=list(matching_tables_dict)
    op_str1='output_folder_transformation/'
    op_str2='.csv'
    x=len(matching_tables)
    print(matching_tables)
    if x==0:
        with open("output_folder_transformation/results.txt",'a') as f:
            print("NO MATCHES FOUND",file=f)
    elif x==1:
        with open("output_folder_transformation/results.txt",'a') as f:
            print(matching_tables[0],file=f)
    else:
        ctr=0
        for i in range(x-1):
            for j in range(i+1,x):
                a=matching_tables[i]+' : '+matching_tables[j]
                b=matching_tables[j]+' : '+matching_tables[i]
                if (a in cat_sim):
                    if cat_sim[a]>=.50 and col_sim[a]>=0.50:
                        res=merge(matching_tables[i],matching_tables[j])
                        ctr+=1
                        op_string=op_str1+str(ctr)+op_str2
                        res.to_csv(op_string,sep=',', index=False)
                        with open("output_folder_transformation/results.txt",'a') as f:
                            print(str(ctr)+op_str2+' : '+matching_tables[i]+' and '+matching_tables[j],file=f)
                            print("Columns that match with input_schema:\n "+matching_tables[i]+' : ',matching_tables_dict[matching_tables[i]],'\t '+matching_tables[j]+' : ',matching_tables_dict[matching_tables[i]],file=f)
                elif (b in cat_sim):
                    if cat_sim[b]>=.50 and col_sim[b]>=0.50:
                        res=merge(matching_tables[i],matching_tables[j])
                        ctr+=1
                        op_string=op_str1+str(ctr)+op_str2
                        res.to_csv(op_string,sep=',', index=False)
                        with open("output_folder_transformation/results.txt",'a') as f:
                            print(ctr(str)+op_str2+' : '+matching_tables[i]+' and '+matching_tables[j],file=f)
                            print("Columns that match with input_schema:\n "+matching_tables[i]+' : ',matching_tables_dict[matching_tables[i]],'\t '+matching_tables[j]+' : ',matching_tables_dict[matching_tables[i]],file=f)
                else:
                    pass
        with open("output_folder_transformation/results.txt",'a') as f:
            for i in matching_tables:
                print(i,file=f)
                print("Columns that match with input_schema:\n "+i+' : ',matching_tables_dict[i],file=f)

In [84]:
get_matches()

only schema
['202_117.csv', '203_148.csv', '203_268.csv', '204_100.csv']


Unnamed: 0,Date,Ship,Nationality,Tonnage,Fate
0,19 August 1941,SS Aguila,United Kingdom,3255,Sunk
1,27 November 1941,HMAS Parramatta,Royal Australian Navy,1060,Sunk
2,23 December 1941,SS Shuntien,United Kingdom,3059,Sunk
3,26 December 1941,SS Warszawa,Poland,2487,Sunk
4,10 June 1942,MV Athene,Norway,4681,Sunk
5,10 June 1942,SS Brambleleaf,United Kingdom,5917,Damaged


Unnamed: 0,Date,Name,Nationality,Tonnage (GRT),Fate
0,18 January 1940,Flandria,Sweden,1179,Sunk
1,19 January 1940,Patria,Sweden,1188,Sunk
2,11 February 1940,Linda,Estonia,1213,Sunk
3,4 May 1940,San Tiburcio,United Kingdom,5995,Sunk (mine)
4,9 May 1940,Doris,French Navy,552,Sunk
5,11 May 1940,Tringa,United Kingdom,1930,Sunk
6,11 May 1940,Viiu,Estonia,1908,Sunk
7,23 May 1940,Sigurd Faulbaum,Belgium,3256,Sunk
8,11 May 1944,Shtorm,Soviet Union,412,Damaged


202_117.csv and 203_148.csv gives : 


Unnamed: 0,Date,Ship,Nationality,Tonnage,Fate,Name,Tonnage (GRT)
0,19 August 1941,SS Aguila,United Kingdom,3255.0,Sunk,,
1,27 November 1941,HMAS Parramatta,Royal Australian Navy,1060.0,Sunk,,
2,23 December 1941,SS Shuntien,United Kingdom,3059.0,Sunk,,
3,26 December 1941,SS Warszawa,Poland,2487.0,Sunk,,
4,10 June 1942,MV Athene,Norway,4681.0,Sunk,,
5,10 June 1942,SS Brambleleaf,United Kingdom,5917.0,Damaged,,
6,18 January 1940,,Sweden,,Sunk,Flandria,1179.0
7,19 January 1940,,Sweden,,Sunk,Patria,1188.0
8,11 February 1940,,Estonia,,Sunk,Linda,1213.0
9,4 May 1940,,United Kingdom,,Sunk (mine),San Tiburcio,5995.0


Unnamed: 0,Date,Ship,Nationality,Tonnage,Fate
0,19 August 1941,SS Aguila,United Kingdom,3255,Sunk
1,27 November 1941,HMAS Parramatta,Royal Australian Navy,1060,Sunk
2,23 December 1941,SS Shuntien,United Kingdom,3059,Sunk
3,26 December 1941,SS Warszawa,Poland,2487,Sunk
4,10 June 1942,MV Athene,Norway,4681,Sunk
5,10 June 1942,SS Brambleleaf,United Kingdom,5917,Damaged


Unnamed: 0,Date,Name,Nationality,Tonnage (GRT),Fate
0,7 October 1941,Svend Foyn,United Kingdom,14795,Damaged
1,16 February 1942,Monagas,Venezuela,2650,Sunk
2,16 February 1942,San Nicholas,United Kingdom,2391,Sunk
3,16 February 1942,Tia Juana,United Kingdom,2395,Sunk
4,22 February 1942,J.N.Pew,United States,9033,Sunk
5,23 February 1942,Sun,United States,9002,Damaged
6,23 February 1942,Thalia,Panama,8329,Sunk
7,11 May 1942,Cape of Good Hope,United Kingdom,4963,Sunk
8,24 May 1942,Gonçalves Dias,Brazil,4996,Sunk
9,28 May 1942,Alcoa Pilgrim,United States,6759,Sunk


202_117.csv and 203_268.csv gives : 


Unnamed: 0,Date,Ship,Nationality,Tonnage,Fate,Name,Tonnage (GRT)
0,19 August 1941,SS Aguila,United Kingdom,3255.0,Sunk,,
1,27 November 1941,HMAS Parramatta,Royal Australian Navy,1060.0,Sunk,,
2,23 December 1941,SS Shuntien,United Kingdom,3059.0,Sunk,,
3,26 December 1941,SS Warszawa,Poland,2487.0,Sunk,,
4,10 June 1942,MV Athene,Norway,4681.0,Sunk,,
5,10 June 1942,SS Brambleleaf,United Kingdom,5917.0,Damaged,,
6,7 October 1941,,United Kingdom,,Damaged,Svend Foyn,14795.0
7,16 February 1942,,Venezuela,,Sunk,Monagas,2650.0
8,16 February 1942,,United Kingdom,,Sunk,San Nicholas,2391.0
9,16 February 1942,,United Kingdom,,Sunk,Tia Juana,2395.0


Unnamed: 0,Date,Name,Nationality,Tonnage (GRT),Fate
0,18 January 1940,Flandria,Sweden,1179,Sunk
1,19 January 1940,Patria,Sweden,1188,Sunk
2,11 February 1940,Linda,Estonia,1213,Sunk
3,4 May 1940,San Tiburcio,United Kingdom,5995,Sunk (mine)
4,9 May 1940,Doris,French Navy,552,Sunk
5,11 May 1940,Tringa,United Kingdom,1930,Sunk
6,11 May 1940,Viiu,Estonia,1908,Sunk
7,23 May 1940,Sigurd Faulbaum,Belgium,3256,Sunk
8,11 May 1944,Shtorm,Soviet Union,412,Damaged


Unnamed: 0,Date,Name,Nationality,Tonnage (GRT),Fate
0,7 October 1941,Svend Foyn,United Kingdom,14795,Damaged
1,16 February 1942,Monagas,Venezuela,2650,Sunk
2,16 February 1942,San Nicholas,United Kingdom,2391,Sunk
3,16 February 1942,Tia Juana,United Kingdom,2395,Sunk
4,22 February 1942,J.N.Pew,United States,9033,Sunk
5,23 February 1942,Sun,United States,9002,Damaged
6,23 February 1942,Thalia,Panama,8329,Sunk
7,11 May 1942,Cape of Good Hope,United Kingdom,4963,Sunk
8,24 May 1942,Gonçalves Dias,Brazil,4996,Sunk
9,28 May 1942,Alcoa Pilgrim,United States,6759,Sunk


203_148.csv and 203_268.csv gives : 


Unnamed: 0,Date,Name,Nationality,Tonnage (GRT),Fate
0,18 January 1940,Flandria,Sweden,1179,Sunk
1,19 January 1940,Patria,Sweden,1188,Sunk
2,11 February 1940,Linda,Estonia,1213,Sunk
3,4 May 1940,San Tiburcio,United Kingdom,5995,Sunk (mine)
4,9 May 1940,Doris,French Navy,552,Sunk
5,11 May 1940,Tringa,United Kingdom,1930,Sunk
6,11 May 1940,Viiu,Estonia,1908,Sunk
7,23 May 1940,Sigurd Faulbaum,Belgium,3256,Sunk
8,11 May 1944,Shtorm,Soviet Union,412,Damaged
9,7 October 1941,Svend Foyn,United Kingdom,14795,Damaged
