# Join Business data tables  
This notebook serves as a data pipeline to merge data sources for TOTA business, and check the result. This is likely a one-time operation.

In [1]:
import pandas as pd 
import numpy as np
import math

### 1.1. Load reference dataset (Sonja's list)

In [2]:
sheet_id = "1GUqBfjwIlXa_vJDSCqLAfeNwtnRulzZfzRsLA7Y5qIQ"
sheet_name = "Sonja_list"
url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
sonja_df = pd.read_csv(url)
sonja_df['Company ID'] = np.int64(sonja_df['Company ID'])
sonja_df.drop(columns = list(sonja_df.filter(regex='Unnamed')), inplace=True)

sonja_df['Company ID'] = sonja_df['Company ID'].apply(lambda x: str(x))
sonja_df['Company ID'].replace(to_replace='-9223372036854775808', value=np.nan, inplace=True)

sonja_df

Unnamed: 0,Nr (Sonjas List),Company ID,Name of Business/Organization,Website,Street,City,Region,Postal Code,Country,Tourism Region,...,Tags,Indigenous Tourism,Biosphere Program Member,Accessibility,Number of Employees,Year Founded,Show_on_platform,Source,Latitude,Longitude
0,1.0,3887918904,Moon Curser Vineyards,mooncurser.com,3628 British Columbia 3,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,Wineries/Destilleries,False,False,True,10.0,2003.0,True,Sonja Main Data Sheet,49.033403,-119.431551
1,2.0,4381261090,Adega On 45Th Estate Winery,www.winebc.com,7311-45Th Street,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,Wineries/Destilleries,False,False,True,,2011.0,True,Sonja Main Data Sheet,49.034170,-119.433264
2,3.0,4381342221,Lastella Winery,www.lastella.ca,8123 148 Avenue,Osoyoos,British Columbia,V0H 1V0,Canada,Thompson Okanagan,...,Wineries/Destilleries,False,False,True,50.0,2005.0,True,Sonja Main Data Sheet,49.054500,-119.497677
3,4.0,4381289423,Nk'Mip Cellars,www.nkmipcellars.com,1400 Rancher Creek Road,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,Wineries/Destilleries,False,False,True,,2003.0,True,Sonja Main Data Sheet,49.038404,-119.433498
4,5.0,4373591998,Bordertown Vineyards And Estate Winery,bordertownwinery.com,9140 - 92Nd Avenue,Osoyoos,British Columbia,V0H 1V2,Canada,Thompson Okanagan,...,Wineries/Destilleries,False,False,True,,,True,Sonja Main Data Sheet,49.037256,-119.477810
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
322,,5065356135,Terravista Vineyards,,,,British Columbia,,Canada,Thompson Okanagan,...,Wineries/Destilleries,False,False,True,,,True,TOTA members,49.550050,-119.562867
323,,4381282467,The Bx Press Cidery & Orchard,http://thebxpress.com/,4667 East Vernon Road,Vernon,British Columbia,,Canada,Thompson Okanagan,...,,False,False,True,,,True,TOTA members,50.279820,-119.222820
324,,4381287827,The Cove Lakeside Resort,www.covelakeside.com,4205 Gellatly Road,West Kelowna,British Columbia,V4T 2K4,Canada,Thompson Okanagan,...,,False,False,True,50.0,2006.0,True,TOTA members,49.810994,-119.624089
325,,5105018331,Tinhorn Creek Vineyards,miradoro.ca,537 Tinhorn Creek Rd,Oliver,British Columbia,V0H 1T1,Canada,Thompson Okanagan,...,Wineries/Destilleries,False,False,True,,1993.0,True,TOTA members,49.150165,-119.591051


### 1.2 Load (original) full data

In [3]:
sheet_id = "1GUqBfjwIlXa_vJDSCqLAfeNwtnRulzZfzRsLA7Y5qIQ"
sheet_name = "Original_data"
url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
ori_df = pd.read_csv(url)
ori_df['Company ID'] = np.int64(ori_df['Company ID'])
ori_df['Company ID'] = ori_df['Company ID'].apply(lambda x: str(x))
ori_df['Company ID'].replace(to_replace='-9223372036854775808', value=np.nan, inplace=True)

ori_df

Unnamed: 0,Company ID,Name of Business/Organization,Website,Street,City,Region,Postal Code,Country,Tourism Region,Tourism Sub-Region,...,Accessibility,Number of Employees,Year Founded,Completed Business Type,Business Type,Business Sub-Type,Show_on_platform,Source,Latitude,Longitude
0,3339931603,Planet Bee Honey Farm & Meadery,www.PlanetBee.com,5011 Bella Vista Road,Vernon,British Columbia,V1H 1A1,Canada,Thompson Okanagan,North Okanagan,...,True,,1982.0,Activity / Attraction,Accommodation,,True,TOTA members,50.263229,-119.306363
1,3376458342,Best Western Plus Kelowna Hotel & Suites,www.bestwesternkelownahotel.com,2402 Highway 97 North,Kelowna,British Columbia,V1X 4J1,Canada,Thompson Okanagan,Central Okanagan,...,True,200.0,,Accommodation,,,True,TOTA members,49.885027,-119.429031
2,3410099603,Okanagan Lavender & Herb Farm,www.okanaganlavender.com,4380 Takla Road,Kelowna,British Columbia,V1W 3C4,Canada,Thompson Okanagan,Central Okanagan,...,True,10.0,,Activity / Attraction,Activity/Attraction,,True,TOTA members,49.823716,-119.448480
3,3410711087,Blarney Holdings Inc (Globe Cafe & The Blarney...,globedining.com,5350 Big White Road,Big White,British Columbia,V1X 4K5,Canada,Thompson Okanagan,Boundary,...,False,25.0,,Food & Beverage,Food & Beverage,,True,TOTA members,49.721623,-118.929871
4,3410743392,Lakeview Motel,christinalakeviewmotel.ca,1658 Highway 3,Christina Lake,British Columbia,,Canada,Thompson Okanagan,Boundary,...,False,,2013.0,Accommodation,Accommodation,Motel,True,TOTA members,49.043105,-118.205474
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3794,,Westbank Indian Band Siya Celebration Cultural...,http://www.wfn.ca/siya-celebration.htm,,Westbank,British Columbia,V4T 3A4,Canada,Thompson Okanagan,,...,,,,"Cultural (Museum, Gallery, Festivals, Etc)",Cultural,,True,ITBC,49.882008,-119.534219
3795,,Westbank Indian Band Sncəwips Heritage Museum,http://www.wfn.ca/siya-celebration.htm,,Westbank,British Columbia,V4T 3A4,Canada,Thompson Okanagan,,...,,,,"Cultural (Museum, Gallery, Festivals, Etc)",Cultural,,True,ITBC,49.878838,-119.535880
3796,,Zipity Doo Dah Adventures (zipline),thewildernessway.org,,Ashcroft,British Columbia,V0K 1A0,Canada,Thompson Okanagan,,...,,,,Activity / Attraction,Activity/Attraction,,True,ITBC,50.725925,-121.280474
3797,,Ashnola Campground / Snaʕsnulax̌tn Campground,https://ashnolacampground.wordpress.com/,,Keremeos,British Columbia,V0X 1N1,Canada,Thompson Okanagan,,...,,,,Accommodation,Accommodation,,True,ITBC,49.211282,-119.983779


In [4]:
col_orig = ['Company ID', 'Name of Business/Organization', 'Website', 'Street',
       'City', 'Region', 'Postal Code', 'Country', 'Tourism Region',
       'Tourism Sub-Region', 'Business Type Hubspot',
       'Business Sub-Type Hubspot', 'Business Type1', 'Business Type2',
       'Business Tags', 'Indigenous Tourism', 'New_Indig',
       'Biosphere Program Member', 'Accessibility', 'Number of Employees',
       'Year Founded', 'Completed Business Type', 'Business Type',
       'Business Sub-Type', 'Show_on_platform', 'Source', 'Latitude',
       'Longitude']
col_sonja = ['Nr (Sonjas List)', 'Company ID', 'Name of Business/Organization',
       'Website', 'Street', 'City', 'Region', 'Postal Code', 'Country',
       'Tourism Region', 'Tourism Sub-Region', 'Business Type Hubspot',
       'Business Sub-Type Hubspot', 'Tags', 'Indigenous Tourism',
       'Biosphere Program Member', 'Accessibility', 'Number of Employees',
       'Year Founded', 'Show_on_platform', 'Source', 'Latitude', 'Longitude']

[x for x in col_orig if x not in col_sonja]

['Business Type1',
 'Business Type2',
 'Business Tags',
 'New_Indig',
 'Completed Business Type',
 'Business Type',
 'Business Sub-Type']

In [5]:
#reduced_df = ori_df[ori_df['Accessibility'] == True][['Company ID', 'New_Indig', 'Completed Business Type','Business Type','Business Sub-Type']].copy()
reduced_df = ori_df[['Company ID', 'Name of Business/Organization' , 'Accessibility' ,'Business Type1',
                     'Business Type2','Business Tags','New_Indig', 'Completed Business Type','Business Type','Business Sub-Type']].copy()
reduced_df = reduced_df.rename(columns={'Accessibility': 'Old_Accessibility'})
reduced_df

Unnamed: 0,Company ID,Name of Business/Organization,Old_Accessibility,Business Type1,Business Type2,Business Tags,New_Indig,Completed Business Type,Business Type,Business Sub-Type
0,3339931603,Planet Bee Honey Farm & Meadery,True,Activity/Attraction,,,NotFound,Activity / Attraction,Accommodation,
1,3376458342,Best Western Plus Kelowna Hotel & Suites,True,Accommodation,,,NotFound,Accommodation,,
2,3410099603,Okanagan Lavender & Herb Farm,True,Activity/Attraction,,,NotFound,Activity / Attraction,Activity/Attraction,
3,3410711087,Blarney Holdings Inc (Globe Cafe & The Blarney...,False,Food & Beverage,,,NotFound,Food & Beverage,Food & Beverage,
4,3410743392,Lakeview Motel,False,Accommodation,,,TRUE,Accommodation,Accommodation,Motel
...,...,...,...,...,...,...,...,...,...,...
3794,,Westbank Indian Band Siya Celebration Cultural...,,Activity/Attraction,,,,"Cultural (Museum, Gallery, Festivals, Etc)",Cultural,
3795,,Westbank Indian Band Sncəwips Heritage Museum,,Activity/Attraction,,,,"Cultural (Museum, Gallery, Festivals, Etc)",Cultural,
3796,,Zipity Doo Dah Adventures (zipline),,Activity/Attraction,,,,Activity / Attraction,Activity/Attraction,
3797,,Ashnola Campground / Snaʕsnulax̌tn Campground,,Accommodation,,,,Accommodation,Accommodation,


In [21]:
print(f'Total rows in Sonja\'s dataset: {len(sonja_df)}')
print(f'Total rows in old dataset: {len(ori_df)}')
print(f'Matches between datasets based on COMPANY ID: {len(set(ori_df["Company ID"].dropna()) & set(sonja_df["Company ID"].dropna()))}')
print(f'Matches between datasets based on COMPANY NAME: {len(set(ori_df["Name of Business/Organization"].dropna()) & set(sonja_df["Name of Business/Organization"].dropna()))}')

Total rows in Sonja's dataset: 327
Total rows in old dataset: 3799
Matches between datasets based on COMPANY ID: 202
Matches between datasets based on COMPANY NAME: 227


In [16]:
df_joined = sonja_df.merge(reduced_df[pd.notnull(reduced_df['Company ID'])], on=['Name of Business/Organization'], how = 'inner')
df_joined

Unnamed: 0,Nr (Sonjas List),Company ID_x,Name of Business/Organization,Website,Street,City,Region,Postal Code,Country,Tourism Region,...,Show_on_platform,Source,Latitude,Longitude,Company ID_y,Old_Accessibility,New_Indig,Completed Business Type,Business Type,Business Sub-Type
0,1.0,3887918904,Moon Curser Vineyards,mooncurser.com,3628 British Columbia 3,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,True,Sonja Main Data Sheet,49.033403,-119.431551,3887918904,False,NotFound,Food & Beverage,Food & Beverage,
1,2.0,4381261090,Adega On 45Th Estate Winery,www.winebc.com,7311-45Th Street,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,True,Sonja Main Data Sheet,49.034170,-119.433264,4381261090,False,NotFound,Food & Beverage,Food & Beverage,
2,3.0,4381342221,Lastella Winery,www.lastella.ca,8123 148 Avenue,Osoyoos,British Columbia,V0H 1V0,Canada,Thompson Okanagan,...,True,Sonja Main Data Sheet,49.054500,-119.497677,4381342221,False,NotFound,Food & Beverage,Food & Beverage,
3,4.0,4381289423,Nk'Mip Cellars,www.nkmipcellars.com,1400 Rancher Creek Road,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,True,Sonja Main Data Sheet,49.038404,-119.433498,4381289423,False,NotFound,Activity / Attraction,Activity/Attraction,
4,4.0,4381289423,Nk'Mip Cellars,www.nkmipcellars.com,1400 Rancher Creek Road,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,True,Sonja Main Data Sheet,49.038404,-119.433498,5104154433,False,NotFound,Food & Beverage,Food & Beverage,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
254,,5066665472,Ramada Hotel Penticton,,,,British Columbia,,Canada,Thompson Okanagan,...,True,TOTA members,49.494342,-119.612784,5066665472,False,NotFound,Accommodation,Accommodation,Hotel
255,,4381275569,Super 8 By Wyndham Lake Country/Winfield Area,http://www.super8.com,9564 Highway 97 North,Lake Country,British Columbia,V4V 1T7,Canada,Thompson Okanagan,...,True,TOTA members,50.020787,-119.406957,4381275569,False,NotFound,Accommodation,Accommodation,
256,,4381282467,The Bx Press Cidery & Orchard,http://thebxpress.com/,4667 East Vernon Road,Vernon,British Columbia,,Canada,Thompson Okanagan,...,True,TOTA members,50.279820,-119.222820,4381282467,False,NotFound,Food & Beverage,Food & Beverage,
257,,4381287827,The Cove Lakeside Resort,www.covelakeside.com,4205 Gellatly Road,West Kelowna,British Columbia,V4T 2K4,Canada,Thompson Okanagan,...,True,TOTA members,49.810994,-119.624089,4381287827,False,NotFound,Accommodation,Accommodation,


In [20]:
print(f'Old Accessibility: {reduced_df["Old_Accessibility"].sum()}')
print(f'New Accessibility: {len(sonja_df)}')
print(f'Combined matches: {len(df_joined)}')
print(f'Old Accessibility in New dataset: {df_joined["Old_Accessibility"].sum()}')

Old Accessibility: 64
New Accessibility: 327
Combined matches: 259
Old Accessibility in New dataset: 39


In [22]:
old = 3799
new = 327
mat = 259

print('Expected total combined cases')

print(old + (new - mat))

Expected total combined cases
3867


In [13]:
df_joined_total = sonja_df.merge(reduced_df[pd.notnull(reduced_df['Name of Business/Organization'])], on=['Name of Business/Organization', 'Company ID'], how = 'outer')
df_joined_total = df_joined_total[pd.notnull(df_joined_total['Accessibility'])]
df_joined_total

Unnamed: 0,Nr (Sonjas List),Company ID,Name of Business/Organization,Website,Street,City,Region,Postal Code,Country,Tourism Region,...,Latitude,Longitude,Old_Accessibility,Business Type1,Business Type2,Business Tags,New_Indig,Completed Business Type,Business Type,Business Sub-Type
0,1.0,3887918904,Moon Curser Vineyards,mooncurser.com,3628 British Columbia 3,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,49.033403,-119.431551,False,Food & Beverage,,,NotFound,Food & Beverage,Food & Beverage,
1,2.0,4381261090,Adega On 45Th Estate Winery,www.winebc.com,7311-45Th Street,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,49.034170,-119.433264,False,Food & Beverage,,,NotFound,Food & Beverage,Food & Beverage,
2,3.0,4381342221,Lastella Winery,www.lastella.ca,8123 148 Avenue,Osoyoos,British Columbia,V0H 1V0,Canada,Thompson Okanagan,...,49.054500,-119.497677,False,Food & Beverage,,,NotFound,Food & Beverage,Food & Beverage,
3,4.0,4381289423,Nk'Mip Cellars,www.nkmipcellars.com,1400 Rancher Creek Road,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,49.038404,-119.433498,False,Activity/Attraction,,,NotFound,Activity / Attraction,Activity/Attraction,
4,5.0,4373591998,Bordertown Vineyards And Estate Winery,bordertownwinery.com,9140 - 92Nd Avenue,Osoyoos,British Columbia,V0H 1V2,Canada,Thompson Okanagan,...,49.037256,-119.477810,False,Food & Beverage,,,NotFound,Food & Beverage,Food & Beverage,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
322,,5065356135,Terravista Vineyards,,,,British Columbia,,Canada,Thompson Okanagan,...,49.550050,-119.562867,False,Food & Beverage,,,NotFound,Food & Beverage,Food & Beverage,
323,,4381282467,The Bx Press Cidery & Orchard,http://thebxpress.com/,4667 East Vernon Road,Vernon,British Columbia,,Canada,Thompson Okanagan,...,50.279820,-119.222820,False,Food & Beverage,,,NotFound,Food & Beverage,Food & Beverage,
324,,4381287827,The Cove Lakeside Resort,www.covelakeside.com,4205 Gellatly Road,West Kelowna,British Columbia,V4T 2K4,Canada,Thompson Okanagan,...,49.810994,-119.624089,False,Accommodation,,,NotFound,Accommodation,Accommodation,
325,,5105018331,Tinhorn Creek Vineyards,miradoro.ca,537 Tinhorn Creek Rd,Oliver,British Columbia,V0H 1T1,Canada,Thompson Okanagan,...,49.150165,-119.591051,False,Food & Beverage,,,NotFound,Food & Beverage,Food & Beverage,


In [14]:
df_joined_total[df_joined_total['Name of Business/Organization'] == 'Nk\'Mip Cellars']

Unnamed: 0,Nr (Sonjas List),Company ID,Name of Business/Organization,Website,Street,City,Region,Postal Code,Country,Tourism Region,...,Latitude,Longitude,Old_Accessibility,Business Type1,Business Type2,Business Tags,New_Indig,Completed Business Type,Business Type,Business Sub-Type
3,4.0,4381289423,Nk'Mip Cellars,www.nkmipcellars.com,1400 Rancher Creek Road,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,49.038404,-119.433498,False,Activity/Attraction,,,NotFound,Activity / Attraction,Activity/Attraction,
312,,5104154433,Nk'Mip Cellars,mkmip.ca,1400 Rancher Creek Road,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,49.038404,-119.433498,False,,,,NotFound,Food & Beverage,Food & Beverage,


In [16]:
#Create empty DF
clean_df = pd.DataFrame(columns = df_joined_total.columns)

#iterate by row
for i in range(len(df_joined_total)-1):
    
    #check if company name is already in the clean dataframe, add if not.
    if df_joined_total['Name of Business/Organization'][i] not in clean_df['Name of Business/Organization'].values:
        #clean_df.loc[len(clean_df)] = df_joined_total.loc[i]
        clean_df = clean_df.append(df_joined_total.loc[i].copy())
    
    if i in range(0, len(df_joined_total), 30):
        print(f'{round((i/len(df_joined_total))*100, 0)}%')
    
print('100%')


0.0%
9.0%
18.0%
28.0%
37.0%
46.0%
55.0%
64.0%
73.0%
83.0%
92.0%
100%


In [17]:
clean_df

Unnamed: 0,Nr (Sonjas List),Company ID,Name of Business/Organization,Website,Street,City,Region,Postal Code,Country,Tourism Region,...,Latitude,Longitude,Old_Accessibility,Business Type1,Business Type2,Business Tags,New_Indig,Completed Business Type,Business Type,Business Sub-Type
0,1.0,3887918904,Moon Curser Vineyards,mooncurser.com,3628 British Columbia 3,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,49.033403,-119.431551,False,Food & Beverage,,,NotFound,Food & Beverage,Food & Beverage,
1,2.0,4381261090,Adega On 45Th Estate Winery,www.winebc.com,7311-45Th Street,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,49.034170,-119.433264,False,Food & Beverage,,,NotFound,Food & Beverage,Food & Beverage,
2,3.0,4381342221,Lastella Winery,www.lastella.ca,8123 148 Avenue,Osoyoos,British Columbia,V0H 1V0,Canada,Thompson Okanagan,...,49.054500,-119.497677,False,Food & Beverage,,,NotFound,Food & Beverage,Food & Beverage,
3,4.0,4381289423,Nk'Mip Cellars,www.nkmipcellars.com,1400 Rancher Creek Road,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,49.038404,-119.433498,False,Activity/Attraction,,,NotFound,Activity / Attraction,Activity/Attraction,
4,5.0,4373591998,Bordertown Vineyards And Estate Winery,bordertownwinery.com,9140 - 92Nd Avenue,Osoyoos,British Columbia,V0H 1V2,Canada,Thompson Okanagan,...,49.037256,-119.477810,False,Food & Beverage,,,NotFound,Food & Beverage,Food & Beverage,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
316,,4381261083,Ramada Hotel & Conference Centre Kelowna,www.ramadalodgehotelkelowna.com,2170 Harvey Ave,Kelowna,British Columbia,V1Y 6G8,Canada,Thompson Okanagan,...,49.882281,-119.438052,False,Accommodation,,,NotFound,Accommodation,Accommodation,
317,,5066665472,Ramada Hotel Penticton,,,,British Columbia,,Canada,Thompson Okanagan,...,49.494342,-119.612784,False,Accommodation,,,NotFound,Accommodation,Accommodation,Hotel
320,,4381275569,Super 8 By Wyndham Lake Country/Winfield Area,http://www.super8.com,9564 Highway 97 North,Lake Country,British Columbia,V4V 1T7,Canada,Thompson Okanagan,...,50.020787,-119.406957,False,Accommodation,,,NotFound,Accommodation,Accommodation,
323,,4381282467,The Bx Press Cidery & Orchard,http://thebxpress.com/,4667 East Vernon Road,Vernon,British Columbia,,Canada,Thompson Okanagan,...,50.279820,-119.222820,False,Food & Beverage,,,NotFound,Food & Beverage,Food & Beverage,


In [23]:
final_cols = ['Nr (Sonjas List)','Company ID', 'Name of Business/Organization', 'Website', 'Street',
       'City', 'Region', 'Postal Code', 'Country', 'Tourism Region',
       'Tourism Sub-Region', 'Business Type Hubspot',
       'Business Sub-Type Hubspot', 'Business Type1', 'Business Type2',
       'Business Tags', 'Indigenous Tourism', 'New_Indig',
       'Biosphere Program Member', 'Accessibility', 'Number of Employees',
       'Year Founded', 'Completed Business Type', 'Business Type',
       'Business Sub-Type', 'Show_on_platform', 'Source', 'Latitude',
       'Longitude']

In [40]:
clean_df_select = clean_df[final_cols]
clean_df_select

Unnamed: 0,Nr (Sonjas List),Company ID,Name of Business/Organization,Website,Street,City,Region,Postal Code,Country,Tourism Region,...,Accessibility,Number of Employees,Year Founded,Completed Business Type,Business Type,Business Sub-Type,Show_on_platform,Source,Latitude,Longitude
0,1.0,3887918904,Moon Curser Vineyards,mooncurser.com,3628 British Columbia 3,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,True,10.0,2003.0,Food & Beverage,Food & Beverage,,True,Sonja Main Data Sheet,49.033403,-119.431551
1,2.0,4381261090,Adega On 45Th Estate Winery,www.winebc.com,7311-45Th Street,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,True,,2011.0,Food & Beverage,Food & Beverage,,True,Sonja Main Data Sheet,49.034170,-119.433264
2,3.0,4381342221,Lastella Winery,www.lastella.ca,8123 148 Avenue,Osoyoos,British Columbia,V0H 1V0,Canada,Thompson Okanagan,...,True,50.0,2005.0,Food & Beverage,Food & Beverage,,True,Sonja Main Data Sheet,49.054500,-119.497677
3,4.0,4381289423,Nk'Mip Cellars,www.nkmipcellars.com,1400 Rancher Creek Road,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,True,,2003.0,Activity / Attraction,Activity/Attraction,,True,Sonja Main Data Sheet,49.038404,-119.433498
4,5.0,4373591998,Bordertown Vineyards And Estate Winery,bordertownwinery.com,9140 - 92Nd Avenue,Osoyoos,British Columbia,V0H 1V2,Canada,Thompson Okanagan,...,True,,,Food & Beverage,Food & Beverage,,True,Sonja Main Data Sheet,49.037256,-119.477810
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
316,,4381261083,Ramada Hotel & Conference Centre Kelowna,www.ramadalodgehotelkelowna.com,2170 Harvey Ave,Kelowna,British Columbia,V1Y 6G8,Canada,Thompson Okanagan,...,True,,1991.0,Accommodation,Accommodation,,True,TOTA members,49.882281,-119.438052
317,,5066665472,Ramada Hotel Penticton,,,,British Columbia,,Canada,Thompson Okanagan,...,True,,,Accommodation,Accommodation,Hotel,True,TOTA members,49.494342,-119.612784
320,,4381275569,Super 8 By Wyndham Lake Country/Winfield Area,http://www.super8.com,9564 Highway 97 North,Lake Country,British Columbia,V4V 1T7,Canada,Thompson Okanagan,...,True,10000.0,1981.0,Accommodation,Accommodation,,True,TOTA members,50.020787,-119.406957
323,,4381282467,The Bx Press Cidery & Orchard,http://thebxpress.com/,4667 East Vernon Road,Vernon,British Columbia,,Canada,Thompson Okanagan,...,True,,,Food & Beverage,Food & Beverage,,True,TOTA members,50.279820,-119.222820


In [41]:
complementary_df = ori_df[~ori_df['Name of Business/Organization'].isin(clean_df['Name of Business/Organization'])].copy()
complementary_df['Nr (Sonjas List)'] = np.nan
complementary_df = complementary_df[final_cols].reset_index()
complementary_df.drop(columns = ['index'], inplace = True)
complementary_df

Unnamed: 0,Nr (Sonjas List),Company ID,Name of Business/Organization,Website,Street,City,Region,Postal Code,Country,Tourism Region,...,Accessibility,Number of Employees,Year Founded,Completed Business Type,Business Type,Business Sub-Type,Show_on_platform,Source,Latitude,Longitude
0,,3410099603,Okanagan Lavender & Herb Farm,www.okanaganlavender.com,4380 Takla Road,Kelowna,British Columbia,V1W 3C4,Canada,Thompson Okanagan,...,True,10.0,,Activity / Attraction,Activity/Attraction,,True,TOTA members,49.823716,-119.448480
1,,3410711087,Blarney Holdings Inc (Globe Cafe & The Blarney...,globedining.com,5350 Big White Road,Big White,British Columbia,V1X 4K5,Canada,Thompson Okanagan,...,False,25.0,,Food & Beverage,Food & Beverage,,True,TOTA members,49.721623,-118.929871
2,,3410743392,Lakeview Motel,christinalakeviewmotel.ca,1658 Highway 3,Christina Lake,British Columbia,,Canada,Thompson Okanagan,...,False,,2013.0,Accommodation,Accommodation,Motel,True,TOTA members,49.043105,-118.205474
3,,3411328350,Smitty's Family Restaurant - Kelowna,smittys.ca,2170 Harvery Aenue,Kelowna,British Columbia,V1Y 6G8,Canada,Thompson Okanagan,...,False,1000.0,1960.0,Food & Beverage,Food & Beverage,,True,TOTA members,49.882281,-119.438052
4,,3415411981,Double E Sportsman Camp,doubleesportsmanscamp.com,5575 Christian Valley Road,Westbridge,British Columbia,V0H 1Y0,Canada,Thompson Okanagan,...,False,,,Accommodation,Accommodation,,True,TOTA members,49.321564,-118.880127
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3556,,,Westbank Indian Band Siya Celebration Cultural...,http://www.wfn.ca/siya-celebration.htm,,Westbank,British Columbia,V4T 3A4,Canada,Thompson Okanagan,...,,,,"Cultural (Museum, Gallery, Festivals, Etc)",Cultural,,True,ITBC,49.882008,-119.534219
3557,,,Westbank Indian Band Sncəwips Heritage Museum,http://www.wfn.ca/siya-celebration.htm,,Westbank,British Columbia,V4T 3A4,Canada,Thompson Okanagan,...,,,,"Cultural (Museum, Gallery, Festivals, Etc)",Cultural,,True,ITBC,49.878838,-119.535880
3558,,,Zipity Doo Dah Adventures (zipline),thewildernessway.org,,Ashcroft,British Columbia,V0K 1A0,Canada,Thompson Okanagan,...,,,,Activity / Attraction,Activity/Attraction,,True,ITBC,50.725925,-121.280474
3559,,,Ashnola Campground / Snaʕsnulax̌tn Campground,https://ashnolacampground.wordpress.com/,,Keremeos,British Columbia,V0X 1N1,Canada,Thompson Okanagan,...,,,,Accommodation,Accommodation,,True,ITBC,49.211282,-119.983779


In [42]:
for i in range(len(complementary_df)-1):
    
    #check if company name is already in the clean dataframe, add if not.
    if complementary_df['Name of Business/Organization'][i] not in clean_df_select['Name of Business/Organization'].values:
        #clean_df.loc[len(clean_df)] = df_joined_total.loc[i]
        clean_df_select = clean_df_select.append(complementary_df.loc[i].copy())
    
    if i in range(0, len(complementary_df), 200):
        print(f'{round((i/len(complementary_df))*100, 0)}%')
    
print('100%')

0.0%
6.0%
11.0%
17.0%
22.0%
28.0%
34.0%
39.0%
45.0%
51.0%
56.0%
62.0%
67.0%
73.0%
79.0%
84.0%
90.0%
95.0%
100%


In [43]:
clean_df_select

Unnamed: 0,Nr (Sonjas List),Company ID,Name of Business/Organization,Website,Street,City,Region,Postal Code,Country,Tourism Region,...,Accessibility,Number of Employees,Year Founded,Completed Business Type,Business Type,Business Sub-Type,Show_on_platform,Source,Latitude,Longitude
0,1.0,3887918904,Moon Curser Vineyards,mooncurser.com,3628 British Columbia 3,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,True,10.0,2003.0,Food & Beverage,Food & Beverage,,True,Sonja Main Data Sheet,49.033403,-119.431551
1,2.0,4381261090,Adega On 45Th Estate Winery,www.winebc.com,7311-45Th Street,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,True,,2011.0,Food & Beverage,Food & Beverage,,True,Sonja Main Data Sheet,49.034170,-119.433264
2,3.0,4381342221,Lastella Winery,www.lastella.ca,8123 148 Avenue,Osoyoos,British Columbia,V0H 1V0,Canada,Thompson Okanagan,...,True,50.0,2005.0,Food & Beverage,Food & Beverage,,True,Sonja Main Data Sheet,49.054500,-119.497677
3,4.0,4381289423,Nk'Mip Cellars,www.nkmipcellars.com,1400 Rancher Creek Road,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,True,,2003.0,Activity / Attraction,Activity/Attraction,,True,Sonja Main Data Sheet,49.038404,-119.433498
4,5.0,4373591998,Bordertown Vineyards And Estate Winery,bordertownwinery.com,9140 - 92Nd Avenue,Osoyoos,British Columbia,V0H 1V2,Canada,Thompson Okanagan,...,True,,,Food & Beverage,Food & Beverage,,True,Sonja Main Data Sheet,49.037256,-119.477810
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3555,,,Westbank Indian Band - Sncewips Heritage Museum,https://www.sncewips.com/,,,British Columbia,,Canada,Thompson Okanagan,...,,,,"Cultural (Museum, Gallery, Festivals, Etc)",Cultural,,True,ITBC,49.878838,-119.535880
3556,,,Westbank Indian Band Siya Celebration Cultural...,http://www.wfn.ca/siya-celebration.htm,,Westbank,British Columbia,V4T 3A4,Canada,Thompson Okanagan,...,,,,"Cultural (Museum, Gallery, Festivals, Etc)",Cultural,,True,ITBC,49.882008,-119.534219
3557,,,Westbank Indian Band Sncəwips Heritage Museum,http://www.wfn.ca/siya-celebration.htm,,Westbank,British Columbia,V4T 3A4,Canada,Thompson Okanagan,...,,,,"Cultural (Museum, Gallery, Festivals, Etc)",Cultural,,True,ITBC,49.878838,-119.535880
3558,,,Zipity Doo Dah Adventures (zipline),thewildernessway.org,,Ashcroft,British Columbia,V0K 1A0,Canada,Thompson Okanagan,...,,,,Activity / Attraction,Activity/Attraction,,True,ITBC,50.725925,-121.280474


In [38]:
clean_df_select['Accessibility'].sum()

344

In [44]:
clean_df_select = clean_df_select[final_cols].reset_index()
clean_df_select.drop(columns = ['index'], inplace = True)
clean_df_select

Unnamed: 0,Nr (Sonjas List),Company ID,Name of Business/Organization,Website,Street,City,Region,Postal Code,Country,Tourism Region,...,Accessibility,Number of Employees,Year Founded,Completed Business Type,Business Type,Business Sub-Type,Show_on_platform,Source,Latitude,Longitude
0,1.0,3887918904,Moon Curser Vineyards,mooncurser.com,3628 British Columbia 3,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,True,10.0,2003.0,Food & Beverage,Food & Beverage,,True,Sonja Main Data Sheet,49.033403,-119.431551
1,2.0,4381261090,Adega On 45Th Estate Winery,www.winebc.com,7311-45Th Street,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,True,,2011.0,Food & Beverage,Food & Beverage,,True,Sonja Main Data Sheet,49.034170,-119.433264
2,3.0,4381342221,Lastella Winery,www.lastella.ca,8123 148 Avenue,Osoyoos,British Columbia,V0H 1V0,Canada,Thompson Okanagan,...,True,50.0,2005.0,Food & Beverage,Food & Beverage,,True,Sonja Main Data Sheet,49.054500,-119.497677
3,4.0,4381289423,Nk'Mip Cellars,www.nkmipcellars.com,1400 Rancher Creek Road,Osoyoos,British Columbia,V0H 1V6,Canada,Thompson Okanagan,...,True,,2003.0,Activity / Attraction,Activity/Attraction,,True,Sonja Main Data Sheet,49.038404,-119.433498
4,5.0,4373591998,Bordertown Vineyards And Estate Winery,bordertownwinery.com,9140 - 92Nd Avenue,Osoyoos,British Columbia,V0H 1V2,Canada,Thompson Okanagan,...,True,,,Food & Beverage,Food & Beverage,,True,Sonja Main Data Sheet,49.037256,-119.477810
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3746,,,Westbank Indian Band - Sncewips Heritage Museum,https://www.sncewips.com/,,,British Columbia,,Canada,Thompson Okanagan,...,,,,"Cultural (Museum, Gallery, Festivals, Etc)",Cultural,,True,ITBC,49.878838,-119.535880
3747,,,Westbank Indian Band Siya Celebration Cultural...,http://www.wfn.ca/siya-celebration.htm,,Westbank,British Columbia,V4T 3A4,Canada,Thompson Okanagan,...,,,,"Cultural (Museum, Gallery, Festivals, Etc)",Cultural,,True,ITBC,49.882008,-119.534219
3748,,,Westbank Indian Band Sncəwips Heritage Museum,http://www.wfn.ca/siya-celebration.htm,,Westbank,British Columbia,V4T 3A4,Canada,Thompson Okanagan,...,,,,"Cultural (Museum, Gallery, Festivals, Etc)",Cultural,,True,ITBC,49.878838,-119.535880
3749,,,Zipity Doo Dah Adventures (zipline),thewildernessway.org,,Ashcroft,British Columbia,V0K 1A0,Canada,Thompson Okanagan,...,,,,Activity / Attraction,Activity/Attraction,,True,ITBC,50.725925,-121.280474


In [45]:
clean_df_select.to_csv('../data/business_TOTA_merged.csv', index = False)