In [1]:
# Libraries
import pandas as pd
import sqldf

In [2]:
# Read and print data as DataFrame
all_sites = pd.read_excel('data/all_sites.xlsx')
all_sites_reduced_columns = [
    "Site ID",
    "LMS Site ID",
    "TOMS Site ID",
    "Velocity Site ID",
    "Site Name",
    "Site Status",
    "Site Type"
]
all_sites = all_sites[all_sites_reduced_columns]
all_sites["Site Name"] = all_sites.apply(
    lambda x: str(x["Site Name"]).replace(str(x["Site ID"]), "").strip(), axis=1)
all_sites.head()


Unnamed: 0,Site ID,LMS Site ID,TOMS Site ID,Velocity Site ID,Site Name,Site Status,Site Type
0,CHI020,CHI020,CL309,CL309,ASTALDI,ACTIVE,MINING
1,AUS038,AUS038,,AU314,GLENCORE - GEORGE FISHER,ACTIVE,MINING
2,105,,,,DON'T USE - STORE CLOSED,CLOSED,ASSOCIATE NONE
3,380,380,,,QUINTETTE COAL LTD.,CLOSED,STORE
4,345,345,CA345,CA345,DETOUR GOLD CONISTON,ACTIVE,MINING


In [3]:
lms_location_names = pd.read_excel('data/lms_location_names.xlsx')
lms_location_names.head()

Unnamed: 0,LMS Site ID,Site Name
0,66,Abbotsford
1,901,Accounting
2,922,Accounts Receivable
3,613,Acheson
4,7,Advertising


In [4]:
mtg_site_attributes = pd.read_excel('data/mtg_site_attributes.xlsx')
mtg_site_attributes_reduced_columns = [
    "Code",
    "SiteID",
    "MineType",
    "SiteType",
    "Status",
    "Commodity",
    "Locale",
    "Organization",
    "Customer",
    "Name"
]
mtg_site_attributes = mtg_site_attributes[mtg_site_attributes_reduced_columns]
mtg_site_attributes["Name"] = mtg_site_attributes["Name"].apply(
    lambda x: str(x).strip()
)
mtg_site_attributes.head()

Unnamed: 0,Code,SiteID,MineType,SiteType,Status,Commodity,Locale,Organization,Customer,Name
0,AU0020,AUS002,,Branch / Depot / Store,Active,,Australia,Kalgoorlie,Kal Tire,Kalgoorlie
1,AU0040,AUS004,,Branch / Depot / Store,Active,,Australia,Perth Branch,Kal Tire,Perth Branch
2,AU0190,AUS019,,Repair / Retread Facility,Active,,Australia,Perth Repair,Kal Tire,Perth Repair
3,AU0210,AUS021,,Regional Admin Office,Active,,Australia,Western Admin,Kal Tire,Western Admin
4,AU0240,AUS024,,Repair / Retread Facility,Active,,Australia,Muswellbrook Repair,Kal Tire,Muswellbrook Repair


In [5]:
r5_mrcs = pd.read_excel('data/r5_mrcs.xlsx')
r5_mrcs_reduced_columns = [
    "MRC_CODE",
    "MRC_DESC"
]
r5_mrcs = r5_mrcs[r5_mrcs_reduced_columns]
r5_mrcs["OBSOLETE"] = r5_mrcs["MRC_DESC"].apply(lambda x: "OBSOLETE" in x)
r5_mrcs["MRC_DESC"] = r5_mrcs["MRC_DESC"].apply(lambda x: x.strip())
r5_mrcs = r5_mrcs[r5_mrcs["OBSOLETE"] == False]
r5_mrcs.head()

Unnamed: 0,MRC_CODE,MRC_DESC,OBSOLETE
0,CA324,GREENHILLS,False
1,CA338,KEARL,False
4,*,DEFAULT / ALL DEPARTMENTS,False
5,CA316,SPARWOOD,False
6,CA381,COAL MOUNTAIN,False


In [6]:
r5_organizations = pd.read_excel('data/r5_organizations.xlsx')
r5_organizations_reduced_columns = [
    "ORG_CODE",
    "ORG_DESC",
    "ORG_UDFCHAR07"
]
r5_organizations = r5_organizations[r5_organizations_reduced_columns]
r5_organizations["ORG_DESC"] = r5_organizations.apply(
    lambda x: x["ORG_DESC"].replace(str(x["ORG_UDFCHAR07"]), ""), axis=1)
r5_organizations["RETIRED"] = r5_organizations["ORG_DESC"].apply(
    lambda x: "RETIRED" in x)
r5_organizations["OBSOLETE"] = r5_organizations["ORG_DESC"].apply(
    lambda x: "OBSOLETE" in x
)
r5_organizations["ORG_DESC"] = r5_organizations["ORG_DESC"].apply(
    lambda x: x.replace("RETIRED", "").strip())
r5_organizations = r5_organizations[r5_organizations["OBSOLETE"] == False]
r5_organizations.head()

Unnamed: 0,ORG_CODE,ORG_DESC,ORG_UDFCHAR07,RETIRED,OBSOLETE
0,AURORA,Aurora North Mine,CA379,False,False
1,FORT HILLS,Fort Hills Oil Sands Project,CA323,False,False
2,LOMAS BAYAS,Minera Lomas Bayas,CL302,True,False
3,HGO,Houndé Gold Mine,BF300,True,False
4,FFOS-Y-FRA,Ffos-y-Fran Coal Mine,GB305,True,False


In [7]:
common_id = pd.read_csv('data/common_id.csv')
common_id_reduced_columns = [
    "OBJ_ORG",
    "OBJ_MRC",
    "OBJ_UDFCHAR15"
]
common_id = common_id[common_id_reduced_columns]
common_id.head()

Unnamed: 0,OBJ_ORG,OBJ_MRC,OBJ_UDFCHAR15
0,FRANKE,CH0080,CH0080
1,MOATIZE,MO0012,MO0012
2,MOATIZE,MO0011,MO0011
3,DOFASCO,CA8361,CA8361
4,ROSEBEL,SU0010,SU0010


In [8]:
# import and run query
with open('querys/organizations.sql', 'r') as fp:
    organizations_query = fp.read()

with open('querys/sites.sql', 'r') as fp:
    final_query = fp.read()

r5_organizations_filled = sqldf.run(organizations_query)
r5_organizations_filled = r5_organizations_filled[r5_organizations_filled["row_num"] == 1]

result = sqldf.run(final_query)
result.to_excel('outputs/standard_location_name.xlsx')
result

Unnamed: 0,Site ID,LMS Site ID,TOMS Site ID,Site Status,Site Type,toms_org_site_name,toms_mrc_site_name,lms_site_name,all_sites_name,site_name
0,CHI020,CHI020,CL309,ACTIVE,MINING,,ASTALDI,,ASTALDI,ASTALDI
1,AUS038,AUS038,,ACTIVE,MINING,Redpath Mining Contractors and Engineers Austr...,,Glencore - George Fisher,GLENCORE - GEORGE FISHER,REDPATH MINING CONTRACTORS AND ENGINEERS AUSTR...
2,105,,,CLOSED,ASSOCIATE NONE,,,,DON'T USE - STORE CLOSED,DON'T USE - STORE CLOSED
3,380,380,,CLOSED,STORE,,,,QUINTETTE COAL LTD.,QUINTETTE COAL LTD.
4,345,345,CA345,ACTIVE,MINING,,DETOUR LAKE,Detour Gold,DETOUR GOLD CONISTON,DETOUR LAKE
...,...,...,...,...,...,...,...,...,...,...
828,874,,,ACTIVE,STORE,,,,MILTON STORE,MILTON STORE
829,616,,,ACTIVE,STORE,,,,AIRDRIE STORE,AIRDRIE STORE
830,646,,,ACTIVE,STORE,,,,BROOKS STORE,BROOKS STORE
831,629,,,ACTIVE,STORE,,,,CALGARY STORE,CALGARY STORE
